diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d1e6631..9bdbc5b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3617,6 +3617,23 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Exceeding this will trigger a flush irrelevant of memory pressure condition."), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), + HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED( + "hive.vectorized.execution.groupby.native.enabled", true, + "This flag should be set to true to enable the native vectorization of queries using GroupBy.\n" + + "The default value is true."), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE( + "hive.test.vectorized.execution.groupby.native.override", + "none", new StringSet("none", "enable", "disable"), + "internal use only, used to override the hive.vectorized.execution.groupby.native.enabled\n" + + "setting. Using enable will force it on and disable will force it off.\n" + + "The default none is do nothing, of course", + true), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE( + "hive.test.vectorized.groupby.native.max.memory.available", -1, + "internal use only, used for creating different vectorized hash table sizes\n" + + "to exercise more logic\n" + + "The default value is -1 which means don't use it", + true), HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + diff --git data/files/groupby_decimal64_1a.txt data/files/groupby_decimal64_1a.txt new file mode 100644 index 0000000..dbe0d86 --- /dev/null +++ data/files/groupby_decimal64_1a.txt @@ -0,0 +1,18 @@ +55.33 +44.2 +435.33 +324.33 +324.33 +-0.342 +44.2 +55.3 +55.3 +0.0 +66.4 +23.22 +-87.2 +\N +33.44 +55.3 +435.331 +-0.342 \ No newline at end of file diff --git data/files/groupby_decimal64_1a_nonull.txt data/files/groupby_decimal64_1a_nonull.txt new file mode 100644 index 0000000..16ae9e4 --- /dev/null +++ data/files/groupby_decimal64_1a_nonull.txt @@ -0,0 +1,17 @@ +55.33 +44.2 +435.33 +324.33 +324.33 +-0.342 +44.2 +55.3 +55.3 +0.0 +66.4 +23.22 +-87.2 +33.44 +55.3 +435.331 +-0.342 \ No newline at end of file diff --git data/files/groupby_decimal64_1b.txt data/files/groupby_decimal64_1b.txt new file mode 100644 index 0000000..c99fd34 --- /dev/null +++ data/files/groupby_decimal64_1b.txt @@ -0,0 +1,17 @@ +4143-07-08 10:53:27.252,3566.02 +5339-02-01 14:10:01.0,7286.29 +5339-02-01 14:10:01.0,2755.40 +2003-09-23 22:33:17.00003252,2516.50 +5397-07-13 07:12:32.000896438,16966.99 +4143-07-08 10:53:27.252,16966.0 +4143-07-08 10:53:27.252,10402 +2003-09-23 22:33:17.00003252,1735.22 +1966-08-16 13:36:50.1,645.07 +\N,15464.67 +1966-08-16 13:36:50.1,8925.82 +1966-08-16 13:36:50.1,11041.91 +7160-12-02 06:00:24.81,645.93 +1976-05-06 00:42:30.910786948,13831.90 +9075-06-13 16:20:09,9559.53 +1985-07-20 09:30:11.0,\N +1999-10-03 16:59:10.396903939,2755.9 \ No newline at end of file diff --git data/files/groupby_decimal64_1b_nonull.txt data/files/groupby_decimal64_1b_nonull.txt new file mode 100644 index 0000000..974cb9d --- /dev/null +++ data/files/groupby_decimal64_1b_nonull.txt @@ -0,0 +1,16 @@ +4143-07-08 10:53:27.252,3566.02 +5339-02-01 14:10:01.0,7286.29 +5339-02-01 14:10:01.0,2755.40 +2003-09-23 22:33:17.00003252,2516.50 +5397-07-13 07:12:32.000896438,16966.99 +4143-07-08 10:53:27.252,16966.0 +4143-07-08 10:53:27.252,10402 +2003-09-23 22:33:17.00003252,1735.22 +1966-08-16 13:36:50.1,645.07 +\N,15464.67 +1966-08-16 13:36:50.1,8925.82 +1966-08-16 13:36:50.1,11041.91 +7160-12-02 06:00:24.81,645.93 +1976-05-06 00:42:30.910786948,13831.90 +9075-06-13 16:20:09,9559.53 +1999-10-03 16:59:10.396903939,2755.9 \ No newline at end of file diff --git data/files/groupby_long_1a.txt data/files/groupby_long_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/groupby_long_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/groupby_long_1a_nonull.txt data/files/groupby_long_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/groupby_long_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/groupby_long_1b.txt data/files/groupby_long_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/groupby_long_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1b_nonull.txt data/files/groupby_long_1b_nonull.txt new file mode 100644 index 0000000..0b438a2 --- /dev/null +++ data/files/groupby_long_1b_nonull.txt @@ -0,0 +1,12 @@ +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1c.txt data/files/groupby_long_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/groupby_long_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_long_1c_nonull.txt data/files/groupby_long_1c_nonull.txt new file mode 100644 index 0000000..f6bc6e8 --- /dev/null +++ data/files/groupby_long_1c_nonull.txt @@ -0,0 +1,10 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_multi_1a.txt data/files/groupby_multi_1a.txt new file mode 100644 index 0000000..e41458d --- /dev/null +++ data/files/groupby_multi_1a.txt @@ -0,0 +1,56 @@ +2268-07-27,43 +1988-01-10,22 +2083-03-10,51 +2207-09-16,15 +2111-10-04,-81 +2088-05-07,-15 +1833-09-17,16 +2204-06-14,22 +1879-03-14,51 +2025-05-17,51 +2207-04-24,-92 +1809-10-10,-28 +1805-12-21,16 +2207-09-16,\N +2194-06-19,-126 +1971-06-16,24 +2251-08-16,\N +1845-11-11,-126 +1858-09-10,22 +2059-05-11,-39 +1892-05-06,-103 +2207-09-16,-13 +1937-09-06,-126 +1820-12-15,51 +2006-12-15,16 +1892-05-06,-121 +\N,-126 +2268-07-27,-12 +2268-07-27,114 +2151-11-20,16 +2268-07-27,118 +2029-11-21,-75 +1859-01-20,16 +1950-10-06,-39 +2185-07-27,51 +2207-09-16,\N +1892-05-06,61 +2207-09-16,-105 +2268-07-27,-117 +2207-04-24,0 +2207-09-16,124 +2059-05-11,-39 +1805-12-21,16 +1805-12-21,16 +2249-12-20,51 +2207-09-16,116 +2207-09-16,122 +2064-09-04,-126 +1869-03-17,-126 +1804-02-16,-39 +1960-04-02,-75 +2086-09-20,-69 +\N,\N +2196-04-12,22 +2251-08-16,-94 +2268-07-27,-12 \ No newline at end of file diff --git data/files/groupby_multi_1a_nonull.txt data/files/groupby_multi_1a_nonull.txt new file mode 100644 index 0000000..9542f64 --- /dev/null +++ data/files/groupby_multi_1a_nonull.txt @@ -0,0 +1,55 @@ +2268-07-27,43 +1988-01-10,22 +2083-03-10,51 +2207-09-16,15 +2111-10-04,-81 +2088-05-07,-15 +1833-09-17,16 +2204-06-14,22 +1879-03-14,51 +2025-05-17,51 +2207-04-24,-92 +1809-10-10,-28 +1805-12-21,16 +2207-09-16,\N +2194-06-19,-126 +1971-06-16,24 +2251-08-16,\N +1845-11-11,-126 +1858-09-10,22 +2059-05-11,-39 +1892-05-06,-103 +2207-09-16,-13 +1937-09-06,-126 +1820-12-15,51 +2006-12-15,16 +1892-05-06,-121 +\N,-126 +2268-07-27,-12 +2268-07-27,114 +2151-11-20,16 +2268-07-27,118 +2029-11-21,-75 +1859-01-20,16 +1950-10-06,-39 +2185-07-27,51 +2207-09-16,\N +1892-05-06,61 +2207-09-16,-105 +2268-07-27,-117 +2207-04-24,0 +2207-09-16,124 +2059-05-11,-39 +1805-12-21,16 +1805-12-21,16 +2249-12-20,51 +2207-09-16,116 +2207-09-16,122 +2064-09-04,-126 +1869-03-17,-126 +1804-02-16,-39 +1960-04-02,-75 +2086-09-20,-69 +2196-04-12,22 +2251-08-16,-94 +2268-07-27,-12 \ No newline at end of file diff --git data/files/groupby_serialize_1a.txt data/files/groupby_serialize_1a.txt new file mode 100644 index 0000000..cae1ecc --- /dev/null +++ data/files/groupby_serialize_1a.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309 +\N +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +\N +\N +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +\N +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1a_nonull.txt data/files/groupby_serialize_1a_nonull.txt new file mode 100644 index 0000000..0520a9a --- /dev/null +++ data/files/groupby_serialize_1a_nonull.txt @@ -0,0 +1,13 @@ +2061-12-19 22:10:32.000628309 +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1b.txt data/files/groupby_serialize_1b.txt new file mode 100644 index 0000000..c47bae0 --- /dev/null +++ data/files/groupby_serialize_1b.txt @@ -0,0 +1,47 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 \ No newline at end of file diff --git data/files/groupby_serialize_1b_nonull.txt data/files/groupby_serialize_1b_nonull.txt new file mode 100644 index 0000000..e640b42 --- /dev/null +++ data/files/groupby_serialize_1b_nonull.txt @@ -0,0 +1,66 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 \ No newline at end of file diff --git data/files/groupby_string_1a.txt data/files/groupby_string_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/groupby_string_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/groupby_string_1a_nonull.txt data/files/groupby_string_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/groupby_string_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/groupby_string_1c.txt data/files/groupby_string_1c.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/groupby_string_1c.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/groupby_string_1c_nonull.txt data/files/groupby_string_1c_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/groupby_string_1c_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java index ca76e6c..859257e 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java @@ -20,6 +20,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -215,6 +218,9 @@ public void setup() throws Exception { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + MapJoinMultiKeyBench.class.getSimpleName() + ".*") + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) .build(); new Runner(opt).run(); } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java index aa88297..919cea4 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -25,9 +25,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.openjdk.jmh.annotations.Param; public abstract class MapJoinMultiKeyBenchBase extends AbstractMapJoin { - + + @Param("100000") // 100,000 + protected int rowCount; + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception { @@ -35,8 +39,6 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, long seed = 2543; - int rowCount = 100000; // 100,000. - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; TypeInfo[] bigTableTypeInfos = new TypeInfo[] { diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java index e13db96..73c6ec5 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java @@ -20,8 +20,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; @@ -46,7 +50,7 @@ public static class MapJoinOneLongKeyInnerRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -54,7 +58,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); } @@ -62,7 +66,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -70,7 +74,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -78,7 +82,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -88,7 +92,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -96,7 +100,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); } @@ -104,7 +108,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -112,7 +116,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -120,7 +124,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -130,7 +134,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -138,7 +142,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); } @@ -146,7 +150,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -154,7 +158,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -162,7 +166,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -172,7 +176,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -188,7 +192,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -196,7 +200,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -204,7 +208,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -215,6 +219,9 @@ public void setup() throws Exception { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + MapJoinOneLongKeyBench.class.getSimpleName() + ".*") + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) .build(); new Runner(opt).run(); } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java index 60b2890..80d3787 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -25,9 +25,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.openjdk.jmh.annotations.Param; public abstract class MapJoinOneLongKeyBenchBase extends AbstractMapJoin { - + + @Param("10000000") // 10,000,000 + protected int rowCount; + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception { @@ -35,8 +39,6 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, long seed = 2543; - int rowCount = 10000000; // 10,000,000. - String[] bigTableColumnNames = new String[] {"number1"}; TypeInfo[] bigTableTypeInfos = new TypeInfo[] { diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java index 6a78a9f..c3307aa 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java @@ -20,6 +20,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -215,6 +218,9 @@ public void setup() throws Exception { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + MapJoinOneStringKeyBench.class.getSimpleName() + ".*") + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) .build(); new Runner(opt).run(); } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java index 937ede1..69a323a 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -25,9 +25,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.openjdk.jmh.annotations.Param; public abstract class MapJoinOneStringKeyBenchBase extends AbstractMapJoin { - + + @Param("100000") // 100,000 + protected int rowCount; + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception { @@ -35,8 +39,6 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, long seed = 2543; - int rowCount = 100000; // 100,000. - String[] bigTableColumnNames = new String[] {"b1"}; TypeInfo[] bigTableTypeInfos = new TypeInfo[] { diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java index 1f87f8d..c581dff 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java @@ -38,6 +38,11 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate.CountAggregateKind; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -66,6 +71,12 @@ public class VectorGroupByOperatorBench extends AbstractOperatorBench { @Param({ + "original", + "native" + }) + private String implementation; + + @Param({ "true", "false" }) @@ -93,6 +104,7 @@ @Param({ "count", + // "countStar", "min", "max", "sum", @@ -109,6 +121,7 @@ @Param({ "bigint", + "date", "double", "string", "decimal(7,2)", // to use this via command line arg "decimal(7_2)" @@ -118,7 +131,7 @@ private String dataType; private Random rand = new Random(1234); - private VectorGroupByOperator vgo; + private Operator vgo; private VectorizedRowBatch vrb; private int size = VectorizedRowBatch.DEFAULT_SIZE; @@ -135,10 +148,62 @@ public void setup() { VectorizationContext ctx = new VectorizationContext("name", ImmutableList.of("A")); GroupByDesc desc = buildGroupByDescType(aggregation, evalMode, "A", typeInfo, processMode); Operator groupByOp = OperatorFactory.get(new CompilationOpContext(), desc); - VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc(); + VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) desc.getVectorDesc(); vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH); - vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc); - vgo.initialize(new Configuration(), null); + if (implementation == null || implementation.equalsIgnoreCase("original")) { + vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc); + vgo.initialize(new Configuration(), null); + } else if (implementation.equalsIgnoreCase("native")) { + if (!aggregation.equalsIgnoreCase("count")) { + System.out.println("Only aggregation count with String supported"); + System.exit(0); + } + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + vectorGroupByInfo.setAggregationVariation(AggregationVariation.HASH_COUNT); + final CountAggregateKind countAggregateKind; + // if (desc.getAggregators().get(0).getParameters().size() == 0) { + // countAggregateKind = CountAggregateKind.COUNT_STAR; + // } else { + countAggregateKind = CountAggregateKind.COUNT_KEY; + // } + vectorGroupByInfo.setCountAggregate( + new CountAggregate(countAggregateKind)); + + final HashTableKeyType hashTableKeyType; + switch (dataType) { + case "bigint": + case "date": + hashTableKeyType = HashTableKeyType.LONG; + break; + case "string": + hashTableKeyType = HashTableKeyType.STRING; + break; + default: + hashTableKeyType = HashTableKeyType.SINGLE_KEY; + break; + } + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(20000000); + + vectorGroupByDesc.setVectorGroupByInfo(vectorGroupByInfo); + + String issue = + Vectorizer.doVectorizeGroupByOperatorPreparation( + groupByOp, ctx, vectorGroupByDesc); + if (issue != null) { + System.out.println(issue); + System.exit(0); + } + vgo = + Vectorizer.specializeGroupByOperator( + groupByOp, ctx, (GroupByDesc) groupByOp.getConf(), vectorGroupByDesc); + vgo.initialize(new Configuration(), null); + } else { + System.out.println("Unknown implementation " + implementation); + System.exit(0); + } + System.out.println("implementation class " + vgo.getClass().getSimpleName()); + } catch (Exception e) { // likely unsupported combination of params // https://bugs.openjdk.java.net/browse/CODETOOLS-7901296 is not available yet to skip benchmark cleanly @@ -162,6 +227,11 @@ private GroupByDesc buildGroupByDescType( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + ArrayList keys = new ArrayList(); + keys.add( + new ExprNodeColumnDesc( + dataType, "A", "table", false)); + desc.setKeys(keys); desc.setVectorDesc(new VectorGroupByDesc()); desc.setOutputColumnNames(outputColumnNames); @@ -191,6 +261,9 @@ private AggregationDesc buildAggregationDesc( GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = (GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); udafBloomFilterEvaluator.setHintEntries(10000); + } else if (aggregate.equals("countStar")) { + aggregate = "count"; + params = new ArrayList(); } agg.setGenericUDAFName(aggregate); agg.setMode(mode); diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index fdd8ecc..a227ed2 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -279,7 +279,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_groupby4.q,\ vector_groupby6.q,\ vector_groupby_3.q,\ + vector_groupby_singlekey.q,\ vector_groupby_mapjoin.q,\ + vector_groupby_multikey.q,\ vector_groupby_reduce.q,\ vector_grouping_sets.q,\ vector_if_expr.q,\ diff --git ql/pom.xml ql/pom.xml index d73deba..8ccd09d 100644 --- ql/pom.xml +++ ql/pom.xml @@ -865,6 +865,7 @@ classpath="${compile.classpath}"/> + diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt new file mode 100644 index 0000000..fa1679e --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt @@ -0,0 +1,291 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to any operator variation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any key variation specific transient variables. +#COMMENT +#BEGIN_LINES COMMON_KEY_VARIATION_TRANSIENT +#IF SINGLE_KEY||MULTI_KEY + // Object that can take the column(s) in row in a vectorized row batch and serialized it. + // The key is not NULL. + private transient VectorSerializeRow keyVectorSerializeWrite; + + // The BinarySortable serialization of the current key. + private transient Output currentKeyOutput; + + // The BinarySortable serialization of the next key for a possible series of equal keys. + private transient Output nextKeyOutput; + +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific Operator import code lines. +#COMMENT +#BEGIN_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Helpful variables for accessing the key values for the LONG and STRING variations. +#COMMENT (None needed for SINGLE_KEY or MULTI_KEY) +#COMMENT +#BEGIN_LINES COMMON_KEY_VECTOR_VARIABLES +#IF LONG_KEY + long[] keyVector = keyColVector.vector; +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[][] keyVector = keyColVector.vector; + final int[] keyStart = keyColVector.start; + final int[] keyLength = keyColVector.length; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + final int firstBatchIndex = selected[0]; +#IF LONG_KEY + long currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[firstBatchIndex]; + int currentKeyStart = keyStart[firstBatchIndex]; + int currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyColIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SINGLE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SINGLE_KEY + final int firstBatchIndex = selected[0]; + if (keyColIsNull[firstBatchIndex]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SINGLE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SINGLE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[firstBatchIndex]; + currentKeyStart = keyStart[firstBatchIndex]; + currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#IF LONG_KEY + long currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[0]; + int currentKeyStart = keyStart[0]; + int currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyColIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SINGLE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SINGLE_KEY + if (keyColIsNull[0]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SINGLE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SINGLE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[0]; + currentKeyStart = keyStart[0]; + currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Get next key value at batchIndex. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES COMMON_GET_NEXT_KEY +#IF LONG_KEY + final long nextKey = keyVector[batchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] nextKey = keyVector[batchIndex]; + final int nextKeyStart = keyStart[batchIndex]; + final int nextKeyLength = keyLength[batchIndex]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + final byte[] nextKey = nextKeyOutput.getData(); + final int nextKeyLength = nextKeyOutput.getLength(); +#ENDIF SINGLE_KEY +#IF MULTI_KEY + Not Applicable -- see COMMON_MULTI_KEY_GET_NEXT_KEY instead. +#ENDIF MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES COMMON_IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT ELSE IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + } else if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + } else if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + } else if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Remember the next key value as the current key value. All variations. +#COMMENT +#BEGIN_LINES COMMON_NEW_CURRENT_KEY + currentKey = nextKey; +#IF STRING_KEY + currentKeyStart = nextKeyStart; + currentKeyLength = nextKeyLength; +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + currentKeyLength = nextKeyLength; + final Output tempOutput = nextKeyOutput; + nextKeyOutput = currentKeyOutput; + currentKeyOutput = tempOutput; +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountColumnTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountColumnTableLines.txt new file mode 100644 index 0000000..7eddb23 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountColumnTableLines.txt @@ -0,0 +1,67 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(non-key-column) aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +#IF LONG_KEY + findOrCreateLongKeyZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT ignore if it already present since the count is 0 in this case. All variations. +#COMMENT +#BEGIN_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS +#IF LONG_KEY + findOrCreateLongKeyZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountKeyTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountKeyTableLines.txt new file mode 100644 index 0000000..2f5915e --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountKeyTableLines.txt @@ -0,0 +1,40 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(key-column) aggregation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES COUNT_KEY_FIND_OR_CREATE_KEY +#IF LONG_KEY + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountStarTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountStarTableLines.txt new file mode 100644 index 0000000..127756f --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountStarTableLines.txt @@ -0,0 +1,40 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(*) aggregation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES COUNT_STAR_FIND_OR_CREATE_KEY +#IF LONG_KEY + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDecimal64KeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDecimal64KeyOperator.txt new file mode 100644 index 0000000..047b560 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDecimal64KeyOperator.txt @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a DECIMAL_64 Native Vectorized GroupBy. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionTableLines.txt new file mode 100644 index 0000000..94ab2d0 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionTableLines.txt @@ -0,0 +1,41 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Duplicate Reduction operator variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY +#IF LONG_KEY + if (currentKey == 0) { + haveZeroKey = true; + } else { + createOrIgnoreLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + createOrIgnoreBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + createOrIgnoreBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt new file mode 100644 index 0000000..8bb0bb4 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt @@ -0,0 +1,108 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to MULTI_KEY variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any multi-key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + + final int size = groupByKeyExpressions.length; + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(size)); + + TypeInfo[] typeInfos = new TypeInfo[size]; + int[] columnMap = new int[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + columnMap[i] = keyExpr.getOutputColumnNum(); + } + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any multi-key variation specific next key code lines. +#COMMENT +#BEGIN_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + final boolean nextKeyIsNull; + final byte[] nextKey; + final int nextKeyLength; + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + if (keyVectorSerializeWrite.getIsAllNulls()) { + nextKeyIsNull = true; + nextKey = null; + nextKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + nextKeyIsNull = false; + nextKey = nextKeyOutput.getData(); + nextKeyLength = nextKeyOutput.getLength(); + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Logical current key values for mutli-key. +#COMMENT +#BEGIN_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES + boolean currKeyIsNull; + byte[] currentKey; + int currentKeyLength; + + final int firstBatchIndex = selected[0]; + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + if (keyVectorSerializeWrite.getIsAllNulls()) { + currKeyIsNull = true; + currentKey = null; + currentKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Physical current key values for mutli-key. +#COMMENT +#BEGIN_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES + boolean currKeyIsNull; + byte[] currentKey; + int currentKeyLength; + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + if (keyVectorSerializeWrite.getIsAllNulls()) { + currKeyIsNull = true; + currentKey = null; + currentKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); + } +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnInclude.txt new file mode 100644 index 0000000..932457d --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnInclude.txt @@ -0,0 +1,312 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyCountColumnOperator. +#COMMENT +#COMMENT + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsColumn). + */ + private void doNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + + // Start counting after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start counting after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNoNullsColumn(batch, inputLogicalSize); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doRepeatingNullColumn(batch, inputLogicalSize); + + } else { + + // Non-NULL repeating non-key column. + doNoNullsColumn(batch, inputLogicalSize); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsColumn(batch, inputLogicalSize, nonKeyColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnOperator.txt new file mode 100644 index 0000000..538d84e --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnOperator.txt @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.VectorGroupByHashMultiKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(non-key-column) Native Vectorized GroupBy. + * + * (For more comments, see GroupByHashSingleKeyCountColumnInclude.txt). + */ +public class VectorGroupByHashMultiKeyCountColumnOperator + extends VectorGroupByHashMultiKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountColumnOperator() { + super(); + } + + public VectorGroupByHashMultiKeyCountColumnOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = countAggregate.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashCountColumnTableLines + +#INCLUDE GroupByHashMultiKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the multi-key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputCountForNullMultiKey(nullKeyCount); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyInclude.txt new file mode 100644 index 0000000..4bae2e3 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyInclude.txt @@ -0,0 +1,98 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyCountKeyOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyOperator.txt new file mode 100644 index 0000000..c732afe --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyOperator.txt @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.VectorGroupByHashMultiKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(key-column) Native Vectorized GroupBy. + * + * (For more comments see GroupByHashSingleKeyCountKeyOperator.txt). + */ +public class VectorGroupByHashMultiKeyCountKeyOperator + extends VectorGroupByHashMultiKeyCountTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountKeyOperator() { + super(); + } + + public VectorGroupByHashMultiKeyCountKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashCountKeyTableLines + +#INCLUDE GroupByHashMultiKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputCountForNullMultiKey(/* nullKeyCount */ 0); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarInclude.txt new file mode 100644 index 0000000..c2860f1 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarInclude.txt @@ -0,0 +1,93 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyCountStarOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarOperator.txt new file mode 100644 index 0000000..737bb18 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarOperator.txt @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.VectorGroupByHashMultiKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(*) Native Vectorized GroupBy that is lookup on + * a single long using a specialized hash map. + * + * (For more comments see GroupByHashSingleKeyCountStarOperator.txt). + */ +public class VectorGroupByHashMultiKeyCountStarOperator + extends VectorGroupByHashMultiKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountStarOperator() { + super(); + } + + public VectorGroupByHashMultiKeyCountStarOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = countAggregate.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashCountStarTableLines + +#INCLUDE GroupByHashMultiKeyCountStarInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyCountStarInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputCountForNullMultiKey(nullKeyCount); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt new file mode 100644 index 0000000..8ba1515 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt @@ -0,0 +1,70 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyDuplicateReductionOperator. +#COMMENT +#COMMENT + protected void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..9b649a4 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.VectorGroupByHashMultiKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key Native Vectorized GroupBy with no aggregation. + * + * (For more comments, see GroupByHashSingleKeyDuplicateReductionOperator.txt). + */ +public class VectorGroupByHashMultiKeyDuplicateReductionOperator + extends VectorGroupByHashMultiKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyDuplicateReductionOperator() { + super(); + } + + public VectorGroupByHashMultiKeyDuplicateReductionOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashDuplicateReductionTableLines + +#INCLUDE GroupByHashMultiKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + outputBatch.size++; + } + + doOutputMultiKeys(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyWordAggrColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyWordAggrColumnInclude.txt new file mode 100644 index 0000000..a0ae51a --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyWordAggrColumnInclude.txt @@ -0,0 +1,481 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyWordColumnOperator. +#COMMENT +#COMMENT + /* + * Do the aggregate column NO REPEATING NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsColumn). + */ + private void doNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + + // Start aggregating after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + aggregate = vector[selected[0]]; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + aggregate = vector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new NULL key's aggregation. + aggregate = vector[batchIndex]; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + } else { + + // Key mismatch. Current non-NULL key ended. + +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +2 + } + } + + /* + * UNDONE... + * Do the aggregate column REPEATING NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsColumn). + */ + private void doRepeatingNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + + // Start aggregating after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + aggregate = vector[selected[0]]; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + aggregate = vector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new NULL key's aggregation. + aggregate = vector[batchIndex]; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + } else { + + // Key mismatch. Current non-NULL key ended. + +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +2 + } + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // Current NULL key series continues. + + // No aggregating the NULL value. + + } else { + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +6 + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key with a NULL value. +#USE_LINES WORD_AGGR_NULL_KEY_ENDED_ALL_NULLS +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // Current non-NULL key series continues. + + // No aggregating of our NULL column. + + } else { + + // Key mismatch. Current non-NULL key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + if (currKeyIsNull) { + + // Remember globally we have a NULL key with a NULL value. +#USE_LINES WORD_AGGR_NULL_KEY_ENDED_ALL_NULLS +2 + } else { + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +2 + } + } + + /* + * Do the aggregate column case NO REPEATING NULLS for handleNullsKey. + */ + private void doNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] aggrColIsNull = aggregateColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + +#IF LOGICAL_BATCH_PROCESSING + boolean isAggregateNull = aggrColIsNull[firstBatchIndex]; + aggregate = vector[firstBatchIndex]; // Undefined when isAggregateNull true. + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + boolean isAggregateNull = aggrColIsNull[0]; + aggregate = vector[0]; // Undefined when isAggregateNull true. + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // Current NULL key series continues. + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGR_NULLS_COLUMN_VALUE +6 + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // Current non-NULL key series continues. + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGR_NULLS_COLUMN_VALUE +6 + + } else { + + // Key mismatch. Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +2 + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the aggregation. + * + * (See the aggregation column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + aggregateColVector = + () batch.cols[wordAggregateColumnNum]; + + if (aggregateColVector.isRepeating) { + + // Aggregation: REPEATING, NULLS Possible. + + if (aggregateColVector.noNulls || !aggregateColVector.isNull[0]) { + + doRepeatingNoNullsColumn(batch, inputLogicalSize, aggregateColVector); + + } else { + + doRepeatingNullColumn(batch, inputLogicalSize, aggregateColVector); + + } + } else if (aggregateColVector.noNulls) { + + // Aggregation: NO REPEATING, NO NULLS + + doNoNullsColumn(batch, inputLogicalSize, aggregateColVector); + + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsColumn(batch, inputLogicalSize, aggregateColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyWordAggrColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyWordAggrColumnOperator.txt new file mode 100644 index 0000000..0741637 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyWordAggrColumnOperator.txt @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.VectorGroupByHashMultiKeyWordAggrTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +#IF DECIMAL64_SUM +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +#ENDIF DECIMAL64_SUM + +/* + * Specialized class for doing a multi-key COUNT(non-key-column) Native Vectorized GroupBy. + * + * (For more comments, see GroupByHashMultiKeyWordColumnInclude.txt). + */ +public class + extends VectorGroupByHashMultiKeyWordAggrTable { + + private static final long serialVersionUID = 1L; + + protected int wordAggregateColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient boolean isNullKeyAggregateNull; + + protected transient nullKeyAggregate; + +#IF DECIMAL64_SUM + protected transient long decimal64SumAbsMax; + + protected transient boolean isNullKeyDecimal64Overflow; + +#ENDIF DECIMAL64_SUM +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + wordAggregateColumnNum = wordAggregate.getWordAggregateColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + isNullKeyAggregateNull = true; + nullKeyAggregate = 0; // Assign some value. +#IF DECIMAL64_SUM + isNullKeyDecimal64Overflow = false; + + DecimalTypeInfo decimal64SumTypeInfo = (DecimalTypeInfo) wordAggregate.getOutputTypeInfo(); + decimal64SumAbsMax = + HiveDecimalWritable.getDecimal64AbsMax( + decimal64SumTypeInfo.getPrecision()); +#ENDIF DECIMAL64_SUM + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashWordAggrColumnCommonLines +#INCLUDE GroupByHashWordAggrColumnTableLines + +#INCLUDE GroupByHashMultiKeyWordAggrColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyWordAggrColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and aggregate pairs of the multi-key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + +#IF DECIMAL64_SUM + if (haveNullKey) { + outputAggregateForNullMultiKey( + isNullKeyAggregateNull || isNullKeyDecimal64Overflow, nullKeyAggregate); + } + + doOutputMultiKeyAndDecimal64SumPairs(); +#ELSE + if (haveNullKey) { + outputAggregateForNullMultiKey(isNullKeyAggregateNull, nullKeyAggregate); + } + + doOutputMultiKeyAndAggregatePairs(); +#ENDIF DECIMAL64_SUM + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt new file mode 100644 index 0000000..a9df6d1 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt @@ -0,0 +1,42 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single Key variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific ColumnVector import code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP +#IF SINGLE_KEY + + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(1)); + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + int[] columnMap = new int[] { groupByKeyExpressions[0].getOutputColumnNum() }; + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#ENDIF SINGLE_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnInclude.txt new file mode 100644 index 0000000..90a462b --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnInclude.txt @@ -0,0 +1,527 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyCountColumnOperator. +#COMMENT +#COMMENT + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNoNullsKey. + * + * Look for sequences of equal keys and determine their count. + */ + private void doNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + + // Start counting after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNoNullsKey. + * + * Scan for sequences of equal keys. The column count is simply 0 because of all NULL values -- + * but we still must create an entry in the slot table. + */ + private void doNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // No counting. + } else { + + // Current key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + + // Handle last key. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + } + + /* + * Do the NO REPEATING NULLS case for handleNoNullsKey. + * + * Look for sequence of equal keys -- look over at the non-key-column and count non-null rows. + * Even when the non-NULL row count is 0, we still must create an entry in the slot table. + */ + private void doNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + + // Handle last key. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + + /* + * <OrPhysical> batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * In general, loop over key column and process the keys. Look for sequences of equal keys. And, + * at the same time do any processing for the non-key-column counting. + * + * Here are the cases: + * + * 1) When non-key-column {REPEATING|NO REPEATING} NO NULLS, look for sequences of equal keys + * and determine their count. + * + * 2) When non-key-column REPEATING NULLS, scan for sequences of equal keys. The column count + * is simply 0 because of all NULL values -- but we still must create an entry in the + * slot table. + * + * 3) Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of + * equal keys -- look over at the non-key-column and count non-null rows. Even when the + * non-null row count is 0, we still must create an entry in the slot table. + * + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // NOTE: We don't look at the non-key column values -- we just count. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doNoNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // REPEATING NO NULLS + doNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS. + + doNoNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNoNullsColumn). + */ + private void doNullsKeyNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + + // Start counting after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start counting after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember we had least one NULL key. + haveNullKey = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // NOTE: We don't look at the non-key column values -- we just count. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // Non-NULL repeating non-key column. + doNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnOperator.txt new file mode 100644 index 0000000..709a1b0 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnOperator.txt @@ -0,0 +1,308 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.VectorGroupByHashKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(non-key-column) Native Vectorized GroupBy. + * That is, the grouping is being done on a single long key and + * the counting is for a another ("non-key") column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongKeyZeroCount( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class + extends VectorGroupByHashKeyCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = countAggregate.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashCountColumnTableLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- it is either ALL NULL keys or ALL same non-NULL keys. + * + * First, we determine the number of non-NULL values in the non-key column. + * Then, whether ALL NULL keys or ALL same non-NULL keys, we create the key if necessary and + * include the new count. + * + * A NULL key is not in the slot table. It is separately represented by members haveNullKey + * and nullKeyCount. + * + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) + throws HiveException, IOException { + + /* + * First, determine the count of the non-key column for the whole batch which is covered by the + * repeating key. + */ + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + int nonKeyNonNullCount; + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + nonKeyNonNullCount = inputLogicalSize; + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + nonKeyNonNullCount = (nonKeyColVector.isNull[0] ? 0 : inputLogicalSize); + + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + nonKeyNonNullCount = 0; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + if (!nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } else { + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + if (!nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } + } + + /* + * Finally, use the non-key non-NULL count for our repeated non-NULL or NULL keys. + */ + if (keyColVector.noNulls || !keyColVector.isNull[0]) { + + // Non-NULL key. +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongKeyZeroCount( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + nonKeyNonNullCount); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKeyCount( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKeyCount( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF SINGLE_KEY + } else { + + // All NULL keys. Since we are counting a non-Key column, we must count it under the NULL + // pseudo-entry. + haveNullKey = true; + nullKeyCount += nonKeyNonNullCount; + + } + } + +#INCLUDE GroupByHashSingleKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + outputCountForNullSingleKey( + keyColumnVector, countColumnVector, nullKeyCount); + } + +#IF LONG_KEY + outputLongZeroCountKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyInclude.txt new file mode 100644 index 0000000..cad9bf9 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyInclude.txt @@ -0,0 +1,147 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyCountKeyOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + } + + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + } + } diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyOperator.txt new file mode 100644 index 0000000..1d705ed --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyOperator.txt @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.VectorGroupByHashKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on one long key and we are counting it. + * + * The NULL key is not represented in the hash table. We handle them as a special case. So, + * the find/create call for non-NULL keys looks like this: + + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeyCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashCountKeyTableLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists but leave its count as 0. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongKeyNonZeroCount( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKeyCount( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKeyCount( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SINGLE_KEY + } else { + + // We note we encountered a repeating NULL key. But there will be no count for it -- + // just NULL. + haveNullKey = true; + } + } + +#INCLUDE GroupByHashSingleKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + + // COUNT(column) does not maintain a count for NULLs and since we are processing the key + // our count is always 0. + outputCountForNullSingleKey( + keyColumnVector, countColumnVector, /* nullKeyCount */ 0); + } + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarInclude.txt new file mode 100644 index 0000000..ee4d418 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarInclude.txt @@ -0,0 +1,142 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyCountStarOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + } + + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarOperator.txt new file mode 100644 index 0000000..5d1ca5a --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarOperator.txt @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.VectorGroupByHashKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(*) Native Vectorized GroupBy that is lookup on + * a single long using a specialized hash map. + * + Count Star + + NULL key has separate counter. + + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeyCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashCountStarTableLines +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists AND count it. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongKeyNonZeroCount( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKeyCount( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKeyCount( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SINGLE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + nullKeyCount += inputLogicalSize; + } + } + +#INCLUDE GroupByHashSingleKeyCountStarInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyCountStarInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + outputCountForNullSingleKey( + keyColumnVector, countColumnVector, nullKeyCount); + } + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt new file mode 100644 index 0000000..f6c48f1 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt @@ -0,0 +1,132 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyDuplicateReductionOperator. +#COMMENT +#COMMENT + /* + * batch processing for NO NULLS key case. + * + * Do find/create on each key. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current key ended. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + + // Handle last key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + } + + /* + * batch processing for NULLS key case. + * + * For all NULL keys cases we note NULL key exists since we don't represent it in the slot table. + * + * Do find/create on each non-NULL key. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + if (currKeyIsNull) { + + // We note we encountered a NULL key. + haveNullKey = true; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..fd56dea --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.VectorGroupByHashKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key Native Vectorized GroupBy with no aggregation. + * + * It is used on a single key for duplicate reduction. + * + * Final duplicate elimination must be done in reduce-shuffle and a reducer since with hash table + * overflow some duplicates can slip through. And, of course, other vertices may contribute + * the same keys. + */ +public class + extends VectorGroupByHashKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashDuplicateReductionTableLines + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For the all NULL or all 0 keys case we note NULL/0 key exists. Otherwise, we do the + * find/create. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + if (repeatingKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + createOrIgnoreLongDuplicateReductionKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + createOrIgnoreBytesDuplicateReductionKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + createOrIgnoreBytesDuplicateReductionKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength)); +#ENDIF SINGLE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + } + } + +#INCLUDE GroupByHashSingleKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + outputBatch.size++; + } + +#IF LONG_KEY + doOutputLongKeys(keyColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeys(keyColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSerializeKeys(keyColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordAggrColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordAggrColumnInclude.txt new file mode 100644 index 0000000..ca42e72 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordAggrColumnInclude.txt @@ -0,0 +1,785 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyWordColumnOperator. +#COMMENT +#COMMENT + /* + * Do the aggregate NO NULLS column case for handleNoNullsKey. + */ + private void doNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + +#IF LOGICAL_BATCH_PROCESSING + aggregate = vector[selected[0]]; + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + aggregate = vector[0]; + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + final value = vector[batchIndex]; + + // Next key. + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + +#USE_LINES WORD_AGGR_COLUMN_VALUE +4 + } else { + + // Current key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new key's aggregation. + aggregate = value; + } + } + + // Handle last key. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION + } + + /* + * Do the aggregate column REPEATING NO NULLS case for handleNoNullsKey. + */ + private void doNoNullsKeyRepeatingNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + final repeatedValue = aggregateColVector.vector[0]; + aggregate = 0; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // No aggregation per key but do maintain the count. + count++; + + } else { + + // Current key ended. + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE +4 + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION + } + + /* + * Do the aggregate column REPEATING NULLS case for handleNoNullsKey. + */ + private void doNoNullsKeyRepeatingNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // No aggregating -- value is NULL. + + } else { + + // Current key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + + // Handle last key. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY + } + + /* + * Do the aggregate column NULLS case for handleNoNullsKey. + */ + private void doNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] aggrColIsNull = aggregateColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + +#IF LOGICAL_BATCH_PROCESSING + boolean isAggregateNull = aggrColIsNull[firstBatchIndex]; + aggregate = vector[firstBatchIndex]; // Undefined when isAggregateNull true. + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + boolean isAggregateNull = aggrColIsNull[0]; + aggregate = vector[0]; // Undefined when isAggregateNull true. + + // Start aggregating after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + // Next key. + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + if (!aggrColIsNull[batchIndex]) { + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGR_NULLS_COLUMN_VALUE +6 + } + } else { + + // Current key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + + } + } + + // Handle last key. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION + } + + /* + * <OrPhysical> batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + aggregateColVector = + () batch.cols[wordAggregateColumnNum]; + + if (aggregateColVector.isRepeating) { + + // Aggregation: REPEATING, NULLS Possible. + + if (aggregateColVector.noNulls || !aggregateColVector.isNull[0]) { + + doNoNullsKeyRepeatingNoNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + } else { + + doNoNullsKeyRepeatingNullsColumn( + batch, inputLogicalSize, keyColVector); + + } + } else if (aggregateColVector.noNulls) { + + // Aggregation: NO REPEATING, NO NULLS + + doNoNullsKeyNoNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } else { + + // Aggregation: NOT REPEATING, NULLS. + + doNoNullsKeyNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } + } + + /* + * Do the aggregate column NO NULLS case for handleNullsKey. + */ + private void doNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + +#IF LOGICAL_BATCH_PROCESSING + aggregate = vector[selected[0]]; + + // Start aggregating after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + aggregate = vector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + // Next key is NULL. + + if (currKeyIsNull) { + + // Current NULL key series continues. + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new NULL key's aggregation. + aggregate = vector[batchIndex]; + } + + } else { + + // Non-NULL next key. + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + } else { + + // Key mismatch. Current non-NULL key ended. + +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +2 + } + } + + /* + * Do the aggregate column REPEATING NO NULLS case for handleNullsKey. + */ + private void doNullsKeyRepeatingNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + final repeatedValue = aggregateColVector.vector[0]; + aggregate = 0; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + int count = 1; + + // Start after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No aggregation per key but do maintain the count. + count++; + + } else { + + // Current non-NULL key ended. + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE +6 + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + count = 1; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE +6 + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // No aggregation per key but do maintain the count. + count++; + + } else { + + // Key mismatch. Current non-NULL key ended. + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE +6 + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE + + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +2 + } + } + + /* + * Do the aggregate column REPEATING NULLS case for handleNullsKey. + */ + private void doNullsKeyRepeatingNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // Current NULL key series continues. + + // No aggregating the NULL value. + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +6 + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key with a NULL value. +#USE_LINES WORD_AGGR_NULL_KEY_ENDED_ALL_NULLS +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Current non-NULL key series continues. + + // No aggregating of our NULL column. + + } else { + + // Key mismatch. Current non-NULL key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + } + } + } + + if (currKeyIsNull) { + + // Remember globally we have a NULL key with a NULL value. +#USE_LINES WORD_AGGR_NULL_KEY_ENDED_ALL_NULLS +2 + } else { + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +2 + } + } + + /* + * Do the aggregate column NO REPEATING NULLS case for handleNullsKey. + */ + private void doNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] aggrColIsNull = aggregateColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + +#IF LOGICAL_BATCH_PROCESSING + boolean isAggregateNull = aggrColIsNull[firstBatchIndex]; + aggregate = vector[firstBatchIndex]; // Undefined when isAggregateNull true. + + // Start aggregating after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + boolean isAggregateNull = aggrColIsNull[0]; + aggregate = vector[0]; // Undefined when isAggregateNull true. + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + // Next key is NULL. + + if (currKeyIsNull) { + + // Current NULL key series continues. + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGR_NULLS_COLUMN_VALUE +6 + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + } + + } else { + + // Non-NULL next key. + +#USE_LINES COMMON_GET_NEXT_KEY +2 + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION +6 + + // New non-NULL current key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Current non-NULL key series continues. + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGR_NULLS_COLUMN_VALUE +6 + + } else { + + // Key mismatch. Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +2 + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have aggregation work. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any aggregation work. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL pseudo-entry processing. + * + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + aggregateColVector = + () batch.cols[wordAggregateColumnNum]; + + if (aggregateColVector.isRepeating) { + + // Aggregation: REPEATING, NULLS Possible. + + if (aggregateColVector.noNulls || !aggregateColVector.isNull[0]) { + + doNullsKeyRepeatingNoNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + } else { + + doNullsKeyRepeatingNullsColumn( + batch, inputLogicalSize, keyColVector); + } + } else if (aggregateColVector.noNulls) { + + // Aggregation: NO REPEATING, NO NULLS + + doNullsKeyNoNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } else { + + // Aggregation: NO REPEATING, NULLS Possible. + + doNullsKeyNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordAggrColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordAggrColumnOperator.txt new file mode 100644 index 0000000..59fe1dc --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordAggrColumnOperator.txt @@ -0,0 +1,460 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.VectorGroupByHashKeyWordAggrTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +#IF DECIMAL64_SUM +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +#ENDIF DECIMAL64_SUM + +/* + * Specialized class for doing a single key {MAX|MIN|SUM} aggregate Native Vectorized GroupBy. + * + * UNDONE + * That is, the grouping is being done on a single long key and + * the counting is for a another ("non-key") column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongKeyZeroCount( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class + extends VectorGroupByHashKeyWordAggrTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int wordAggregateColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient boolean isNullKeyAggregateNull; + + protected transient nullKeyAggregate; + +#IF DECIMAL64_SUM + protected transient long decimal64SumAbsMax; + + protected transient boolean isNullKeyDecimal64Overflow; + +#ENDIF DECIMAL64_SUM +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + wordAggregateColumnNum = wordAggregate.getWordAggregateColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + isNullKeyAggregateNull = true; + nullKeyAggregate = 0; // Assign some value. +#IF DECIMAL64_SUM + isNullKeyDecimal64Overflow = false; + + DecimalTypeInfo decimal64SumTypeInfo = (DecimalTypeInfo) wordAggregate.getOutputTypeInfo(); + decimal64SumAbsMax = + HiveDecimalWritable.getDecimal64AbsMax( + decimal64SumTypeInfo.getPrecision()); +#ENDIF DECIMAL64_SUM + } +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashWordAggrColumnCommonLines +#INCLUDE GroupByHashWordAggrColumnTableLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- it is either ALL NULL keys or ALL same non-NULL keys. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) + throws HiveException, IOException { + + /* + * First, determine the aggregation of the non-key column for the whole batch which is covered + * by the repeating key. + */ + ColumnVector aggregateColVector = batch.cols[wordAggregateColumnNum]; + [] vector = (() aggregateColVector).vector; + boolean isAggregateNull = true; + aggregate = 0; +#IF DECIMAL64_SUM + boolean isDecimal64Overflow = false; +#ENDIF DECIMAL64_SUM + final int count = inputLogicalSize; + if (aggregateColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + if (!aggregateColVector.isNull[0]) { + + isAggregateNull = false; + final repeatedValue = vector[0]; + +#USE_LINES WORD_AGGR_REPEATED_COLUMN_VALUE +4 + } + + } if (aggregateColVector.noNulls) { + + // Non-Key: NOT REPEATING, NO NULLS. + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + isAggregateNull = false; + aggregate = vector[selected[0]]; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + } + } else { + + isAggregateNull = false; + aggregate = vector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +6 + } + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + boolean[] nonKeyIsNull = aggregateColVector.isNull; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + // Scan for first non-NULL column value... + int i = 0; + int batchIndex; + while (true) { + batchIndex = selected[i]; + if (!nonKeyIsNull[batchIndex]) { + break; + } + if (++i >= inputLogicalSize) { + break; + } + } + if (i < inputLogicalSize) { + isAggregateNull = false; + aggregate = vector[batchIndex]; + i++; + for (; i < inputLogicalSize; i++) { + batchIndex = selected[i]; + if (!nonKeyIsNull[batchIndex]) { + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +10 + } + } + } + } else { + + // Scan for first non-NULL column value... + int batchIndex = 0; + while (true) { + if (!nonKeyIsNull[batchIndex]) { + break; + } + if (++batchIndex >= inputLogicalSize) { + break; + } + } + if (batchIndex < inputLogicalSize) { + isAggregateNull = false; + aggregate = vector[batchIndex++]; + for (; batchIndex < inputLogicalSize; batchIndex++) { + if (!nonKeyIsNull[batchIndex]) { + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGR_COLUMN_VALUE +10 + } + } + } + } + } + + /* + * Finally, use the non-key non-NULL aggregation for our repeated non-NULL or NULL keys. + */ + if (keyColVector.noNulls || !keyColVector.isNull[0]) { + + // Non-NULL key. +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + final int hashCode = HashCodeUtil.calculateLongHashCode(repeatingKey); + if (isAggregateNull) { + createOrIgnoreLongKeyNullEntry( + repeatingKey, + hashCode); + } else { + findOrCreateLongKeyWord( + repeatingKey, + hashCode, + aggregate); + if (currentIsAggregationNeeded) { + +#USE_LINES WORD_AGGR_FOR_FIND_OR_CREATE +6 + } + } +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + final int hashCode = + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength); + if (isAggregateNull) { + createOrIgnoreBytesKeyNullEntry( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + hashCode); + } else { + findOrCreateBytesKeyWord( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + hashCode, + aggregate); + if (currentIsAggregationNeeded) { + +#USE_LINES WORD_AGGR_FOR_FIND_OR_CREATE +6 + } + } +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + final int hashCode = + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength); + if (isAggregateNull) { + createOrIgnoreBytesKeyNullEntry( + repeatingKey, 0, repeatingKeyLength, + hashCode); + } else { + findOrCreateBytesKeyWord( + repeatingKey, 0, repeatingKeyLength, + hashCode, + aggregate); + if (currentIsAggregationNeeded) { + +#USE_LINES WORD_AGGR_FOR_FIND_OR_CREATE +6 + } + } +#ENDIF SINGLE_KEY + } else { + + // All NULL keys. Since we are aggregating a non-Key column, we must aggregate it under the + // NULL pseudo-entry. + haveNullKey = true; + if (!isAggregateNull) { + + if (isNullKeyAggregateNull) { + isNullKeyAggregateNull = false; + nullKeyAggregate = aggregate; + } else { + +#USE_LINES WORD_AGGR_NULL_KEY +6 + } + } + + } + } + +#INCLUDE GroupByHashSingleKeyWordAggrColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyWordAggrColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and aggregate pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + aggregateColumnVector = () outputBatch.cols[1]; + +#IF DECIMAL64_SUM + if (haveNullKey) { + outputAggregateForNullSingleKey( + keyColumnVector, aggregateColumnVector, + isNullKeyAggregateNull || isNullKeyDecimal64Overflow, nullKeyAggregate); + } + +#IF LONG_KEY + outputLongKeyAndDecimal64SumPairs( + keyColumnVector, aggregateColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndDecimal64SumPairs( + keyColumnVector, aggregateColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndDecimal64SumPairs( + keyColumnVector, aggregateColumnVector); +#ENDIF SINGLE_KEY +#ELSE + if (haveNullKey) { + outputAggregateForNullSingleKey( + keyColumnVector, aggregateColumnVector, isNullKeyAggregateNull, nullKeyAggregate); + } + +#IF LONG_KEY + outputLongKeyAndAggregatePairs( + keyColumnVector, aggregateColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndAggregatePairs( + keyColumnVector, aggregateColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndAggregatePairs( + keyColumnVector, aggregateColumnVector); +#ENDIF SINGLE_KEY +#ENDIF DECIMAL64_SUM + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordAggrColumnCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordAggrColumnCommonLines.txt new file mode 100644 index 0000000..abf7111 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordAggrColumnCommonLines.txt @@ -0,0 +1,166 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to word column aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGR_FOR_FIND_OR_CREATE + // aggregation. +#IF LONG_MIN + if (aggregate < currentLongWordAggr) { + replaceLongWordAggr(aggregate); + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (aggregate > currentLongWordAggr) { + replaceLongWordAggr(aggregate); + } +#ENDIF LONG_MAX +#IF LONG_SUM + replaceLongWordAggr(currentLongWordAggr + aggregate); +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + final long decimal64Sum = currentLongWordAggr + aggregate; + if (isDecimal64Overflow || Math.abs(decimal64Sum) > decimal64SumAbsMax) { + setLongWordAggrOverflow(); + } else { + replaceLongWordAggr(decimal64Sum); + } +#ENDIF DECIMAL64_SUM +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGR_REPEATED_COLUMN_VALUE +#IF LONG_MIN + // LONG_MIN repeated aggregation is just the value. + aggregate = repeatedValue; +#ENDIF LONG_MIN +#IF LONG_MAX + // LONG_MAX repeated aggregation is just the value. + aggregate = repeatedValue; +#ENDIF LONG_MAX +#IF LONG_SUM + // LONG_SUM repeated aggregation calculation. + aggregate = repeatedValue * count; +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + aggregate = repeatedValue * count; + if (Math.abs(aggregate) > decimal64SumAbsMax) { + isDecimal64Overflow = true; + } +#ENDIF DECIMAL64_SUM +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGR_COLUMN_VALUE + // aggregation. +#IF LONG_MIN + if (value < aggregate) { + aggregate = value; + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (value > aggregate) { + aggregate = value; + } +#ENDIF LONG_MAX +#IF LONG_SUM + aggregate += value; +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + aggregate += value; + if (Math.abs(aggregate) > decimal64SumAbsMax) { + isDecimal64Overflow = true; + } +#ENDIF DECIMAL64_SUM +#END_LINES +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGR_NULLS_COLUMN_VALUE + if (isAggregateNull) { + isAggregateNull = false; + aggregate = value; + } else { + + // aggregation. +#IF LONG_MIN + if (value < aggregate) { + aggregate = value; + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (value > aggregate) { + aggregate = value; + } +#ENDIF LONG_MAX +#IF LONG_SUM + aggregate += value; +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + aggregate += value; + if (Math.abs(aggregate) > decimal64SumAbsMax) { + isDecimal64Overflow = true; + } +#ENDIF DECIMAL64_SUM + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current NULL key ended -- +#COMMENT +#BEGIN_LINES WORD_AGGR_NULL_KEY_ENDED_ALL_NULLS + if (!haveNullKey) { + + // We now have a NULL key for NULL value. + haveNullKey = true; + isNullKeyAggregateNull = true; + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGR_NULL_KEY + // aggregation against NULL key aggregate. +#IF LONG_MIN + if (aggregate < nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (aggregate > nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF LONG_MAX +#IF LONG_SUM + nullKeyAggregate += aggregate; +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + nullKeyAggregate += aggregate; + if (Math.abs(nullKeyAggregate) > decimal64SumAbsMax) { + isNullKeyDecimal64Overflow = true; + } +#ENDIF DECIMAL64_SUM +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordAggrColumnTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordAggrColumnTableLines.txt new file mode 100644 index 0000000..fa9d4ae --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordAggrColumnTableLines.txt @@ -0,0 +1,243 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to non-COUNT non-key-column aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current NULL key ended -- +#COMMENT +#BEGIN_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION + if (!haveNullKey) { + + // We now have a NULL key. + haveNullKey = true; + isNullKeyAggregateNull = isAggregateNull; + nullKeyAggregate = aggregate; // Undefined when isAggregateNull true. + } else if (!isAggregateNull) { + + // We have something to work on. + + if (isNullKeyAggregateNull) { + + // First non-NULL aggregate for NULL key. + isNullKeyAggregateNull = false; + nullKeyAggregate = aggregate; + } else { + + // aggregation for current NULL key aggregate. +#IF LONG_MIN + if (aggregate < nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (aggregate > nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF LONG_MAX +#IF LONG_SUM + nullKeyAggregate += aggregate; +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + nullKeyAggregate += aggregate; + if (Math.abs(nullKeyAggregate) > decimal64SumAbsMax) { + isNullKeyDecimal64Overflow = true; + } +#ENDIF DECIMAL64_SUM + } + } +#END_LINES +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current NULL key ended -- +#COMMENT +#BEGIN_LINES WORD_NULL_KEY_ENDED_NO_NULLS_AGGREGATION + if (!haveNullKey || isNullKeyAggregateNull) { + + // Initialize. + haveNullKey = true; + isNullKeyAggregateNull = false; + nullKeyAggregate = aggregate; + } else { + + // for current NULL key aggregate. +#IF LONG_MIN + if (aggregate < nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (aggregate > nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF LONG_MAX +#IF LONG_SUM + nullKeyAggregate += aggregate; +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + nullKeyAggregate += aggregate; + if (Math.abs(nullKeyAggregate) > decimal64SumAbsMax) { + isNullKeyDecimal64Overflow = true; + } +#ENDIF DECIMAL64_SUM + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +#IF LONG_KEY + findOrCreateLongKeyWord( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + aggregate); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyWord( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + aggregate); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyWord( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + aggregate); +#ENDIF SINGLE_KEY||MULTI_KEY + if (currentIsAggregationNeeded) { + + // aggregation against key hash table entry. +#IF LONG_MIN + if (aggregate < currentLongWordAggr) { + replaceLongWordAggr(aggregate); + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (aggregate > currentLongWordAggr) { + replaceLongWordAggr(aggregate); + } +#ENDIF LONG_MAX +#IF LONG_SUM + replaceLongWordAggr(currentLongWordAggr + aggregate); +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + final long decimal64Sum = currentLongWordAggr + aggregate; + if (isDecimal64Overflow || Math.abs(decimal64Sum) > decimal64SumAbsMax) { + setLongWordAggrOverflow(); + } else { + replaceLongWordAggr(decimal64Sum); + } +#ENDIF DECIMAL64_SUM + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION + if (isAggregateNull) { +#IF LONG_KEY + createOrIgnoreLongKeyNullEntry( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); +#ENDIF LONG_KEY +#IF STRING_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SINGLE_KEY||MULTI_KEY + } else { +#IF LONG_KEY + findOrCreateLongKeyWord( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + aggregate); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyWord( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + aggregate); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyWord( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + aggregate); +#ENDIF SINGLE_KEY||MULTI_KEY + if (currentIsAggregationNeeded) { + + // aggregation against key hash table entry. +#IF LONG_MIN + if (aggregate < currentLongWordAggr) { + replaceLongWordAggr(aggregate); + } +#ENDIF LONG_MIN +#IF LONG_MAX + if (aggregate > currentLongWordAggr) { + replaceLongWordAggr(aggregate); + } +#ENDIF LONG_MAX +#IF LONG_SUM + replaceLongWordAggr(currentLongWordAggr + aggregate); +#ENDIF LONG_SUM +#IF DECIMAL64_SUM + final long decimal64Sum = currentLongWordAggr + aggregate; + if (isDecimal64Overflow || Math.abs(decimal64Sum) > decimal64SumAbsMax) { + setLongWordAggrOverflow(); + } else { + replaceLongWordAggr(decimal64Sum); + } +#ENDIF DECIMAL64_SUM + } + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +#IF LONG_KEY + createOrIgnoreLongKeyNullEntry( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); +#ENDIF LONG_KEY +#IF STRING_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 6ca1248..4080817 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -729,17 +729,22 @@ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, return expr; } - public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) - throws HiveException { - VectorExpression[] vecExprs = - getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + public static void upConvertDecimal64(VectorExpression[] vecExprs, VectorizationContext vContext) + throws HiveException { final int size = vecExprs.length; for (int i = 0; i < size; i++) { VectorExpression vecExpr = vecExprs[i]; if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { - vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); + vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr, vContext); } } + } + + public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) + throws HiveException { + VectorExpression[] vecExprs = + getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + upConvertDecimal64(vecExprs, this); return vecExprs; } @@ -1677,9 +1682,11 @@ private VectorExpression createDecimal64VectorExpression(Class vectorClass, * The instantiateExpression method sets the output column and type information. */ VectorExpression vectorExpression = - instantiateExpression(vectorClass, returnTypeInfo, returnDataTypePhysicalVariation, arguments); + instantiateExpression( + vectorClass, returnTypeInfo, DataTypePhysicalVariation.DECIMAL_64, this, + arguments); if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression(vectorClass, returnTypeInfo, returnDataTypePhysicalVariation, arguments); + handleCouldNotInstantiateVectorExpression(vectorClass, returnTypeInfo, DataTypePhysicalVariation.DECIMAL_64, arguments); } vectorExpression.setInputTypeInfos(typeInfos); @@ -1785,8 +1792,9 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, return createVectorExpression(vclass, childExpr, childrenMode, returnType); } - private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeInfo resultTypeInfo) - throws HiveException { + private static VectorExpression createDecimal64ToDecimalConversion(int colIndex, + TypeInfo resultTypeInfo, VectorizationContext vContext) + throws HiveException { Object [] conversionArgs = new Object[1]; conversionArgs[0] = colIndex; VectorExpression vectorExpression = @@ -1794,6 +1802,7 @@ private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeIn ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, + vContext, conversionArgs); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression( @@ -1820,17 +1829,18 @@ public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs) vecExpr.getOutputDataTypePhysicalVariation(); if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { vecExprs[i] = - wrapWithDecimal64ToDecimalConversion(vecExpr); + wrapWithDecimal64ToDecimalConversion(vecExpr, this); } } } } - public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) + public static VectorExpression wrapWithDecimal64ToDecimalConversion( + VectorExpression inputExpression, VectorizationContext vContext) throws HiveException { VectorExpression wrapExpression = createDecimal64ToDecimalConversion( - inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo()); + inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo(), vContext); if (inputExpression instanceof IdentityExpression) { return wrapExpression; } @@ -1876,11 +1886,14 @@ private VectorExpression createVectorExpression(Class vectorClass, // In this method, we must only process non-Decimal64 column vectors. // Convert Decimal64 columns to regular decimal. - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + DataTypePhysicalVariation dataTypePhysicalVariation = + getDataTypePhysicalVariation(colIndex); + if (dataTypePhysicalVariation != null && + dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { // FUTURE: Can we reuse this conversion? - VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo); + VectorExpression vChild = + createDecimal64ToDecimalConversion(colIndex, childTypeInfo, this); children.add(vChild); arguments[i] = vChild.getOutputColumnNum(); @@ -1909,7 +1922,10 @@ private VectorExpression createVectorExpression(Class vectorClass, throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); } } - VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); + VectorExpression vectorExpression = + instantiateExpression( + vectorClass, returnType, DataTypePhysicalVariation.NONE, this, + arguments); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); } @@ -1928,7 +1944,7 @@ private VectorExpression createVectorExpression(Class vectorClass, return vectorExpression; } - private void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, + private static void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, DataTypePhysicalVariation dataTypePhysicalVariation, Object[] arguments) throws HiveException { String displayString = "Could not instantiate vector expression class " + vectorClass.getName() + " for arguments " + Arrays.toString(arguments) + " return type " + @@ -1943,7 +1959,7 @@ private void handleCouldNotInstantiateVectorExpression(Class vectorClass, Typ return VectorExpressionDescriptor.Mode.PROJECTION; } - private String getNewInstanceArgumentString(Object [] args) { + private static String getNewInstanceArgumentString(Object [] args) { if (args == null) { return "arguments: NULL"; } @@ -1983,8 +1999,9 @@ public static String getStackTraceAsSingleLine(Throwable e) { return cleaned; } - public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, - DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) + public static VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, + DataTypePhysicalVariation returnDataTypePhysicalVariation, VectorizationContext vContext, + Object...args) throws HiveException { VectorExpression ve = null; Constructor ctor = getConstructor(vclass); @@ -1994,15 +2011,19 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy try { ve = (VectorExpression) ctor.newInstance(); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with 0 arguments" + + ", exception: " + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength) { try { ve = (VectorExpression) ctor.newInstance(args); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with " + getNewInstanceArgumentString(args) + + ", exception: " + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength + 1) { // Additional argument is needed, which is the outputcolumn. @@ -2017,7 +2038,7 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy // Special handling for decimal because decimal types need scale and precision parameter. // This special handling should be avoided by using returnType uniformly for all cases. final int outputColumnNum = - ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); + vContext.ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); newArgs = Arrays.copyOf(args, numParams); newArgs[numParams-1] = outputColumnNum; @@ -2031,8 +2052,10 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with arguments " + getNewInstanceArgumentString(newArgs) + + ", exception: " + getStackTraceAsSingleLine(ex)); } } // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. @@ -2885,7 +2908,7 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { // Do Decimal64 conversion instead. - return createDecimal64ToDecimalConversion(colIndex, returnType); + return createDecimal64ToDecimalConversion(colIndex, returnType, this); } else { return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); @@ -3889,7 +3912,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio return ts; } - private Constructor getConstructor(Class cl) throws HiveException { + private static Constructor getConstructor(Class cl) throws HiveException { try { Constructor [] ctors = cl.getDeclaredConstructors(); if (ctors.length == 1) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java new file mode 100644 index 0000000..830ef0d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.WordAggregate; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; + +/** + * This class is common operator class of Native Vector GROUP BY that has common + * initialization logic. + */ +public abstract class VectorGroupByCommon + extends Operator + implements VectorizationContextRegion, VectorizationOperator { + + private static final long serialVersionUID = 1L; + + /* + * Standard boilerplate code for vectorized operators. + */ + protected VectorGroupByDesc vectorDesc; + + protected VectorizationContext vContext; + + // Create a new outgoing vectorization context because column name map will change. + protected VectorizationContext vOutContext; + + /* + * Native Vector GROUP BY specific configuration members. + */ + protected VectorGroupByInfo vectorGroupByInfo; + + protected VectorExpression[] groupByKeyExpressions; + + protected VectorAggregationDesc[] vectorAggregationDescs; + + protected AggregationVariation aggregationVariation; + + // Extra information for single COUNT and single word-aggr queries. + protected CountAggregate countAggregate; + protected WordAggregate wordAggregate; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // For debug tracing: the name of the map or reduce task. + protected transient String taskName; + + // Debug display. + protected transient long batchCounter; + + public VectorGroupByCommon() { + super(); + } + + public VectorGroupByCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx); + + /* + * Standard boilerplate code for vectorized operators. + */ + GroupByDesc desc = (GroupByDesc) conf; + this.conf = desc; + this.vectorDesc = (VectorGroupByDesc) vectorDesc; + vectorGroupByInfo = this.vectorDesc.getVectorGroupByInfo(); + + this.vContext = vContext; + + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + + /* + * Native Vector GROUP BY specific configuration information. + */ + groupByKeyExpressions = this.vectorDesc.getKeyExpressions(); + + vectorAggregationDescs = this.vectorDesc.getVecAggrDescs(); + + aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + + countAggregate = vectorGroupByInfo.getCountAggregation(); + wordAggregate = vectorGroupByInfo.getWordAggregation(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + if (work == null) { + taskName = "none"; + } else { + taskName = work.getName(); + } + + batchCounter = 0; + } + + /** + * Implements the getName function for the Node Interface. + * + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + public static String getOperatorName() { + return "GBY"; + } + + @Override + public VectorizationContext getOutputVectorizationContext() { + return vOutContext; + } + + @Override + public VectorizationContext getInputVectorizationContext() { + return vContext; + } + + @Override + public VectorDesc getVectorDesc() { + return vectorDesc; + } + + @Override + public OperatorType getType() { + return OperatorType.GROUPBY; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java new file mode 100644 index 0000000..e113eb8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class is common operator class of Native Vector GROUP BY for output generation. + * Taking the aggregations and filling up the output batch. + */ +public abstract class VectorGroupByCommonOutput + extends VectorGroupByCommon { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient VectorizedRowBatch outputBatch; + + private transient VectorizedRowBatchCtx vectorizedRowBatchCtx; + + private transient TypeInfo[] outputTypeInfos; + private transient DataTypePhysicalVariation[] outputDataTypePhysicalVariations; + + private transient StandardStructObjectInspector standardOutputObjInspector; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByCommonOutput() { + super(); + } + + public VectorGroupByCommonOutput(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + List objectInspectors = new ArrayList(); + + List outputFieldNames = conf.getOutputColumnNames(); + + final int keyCount = (groupByKeyExpressions == null ? 0 : groupByKeyExpressions.length); + final int aggrCount = (vectorAggregationDescs == null ? 0 : vectorAggregationDescs.length); + final int outputCount = keyCount + aggrCount; + outputTypeInfos = new TypeInfo[outputCount]; + outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputCount]; + int outputTypesIndex = 0; + + for(int i = 0; i < keyCount; ++i) { + VectorExpression keyExpression = groupByKeyExpressions[i]; + TypeInfo outputTypeInfo = keyExpression.getOutputTypeInfo(); + outputTypeInfos[outputTypesIndex] = outputTypeInfo; + DataTypePhysicalVariation outputDataTypePhysicalVariation = + keyExpression.getOutputDataTypePhysicalVariation(); + outputDataTypePhysicalVariations[outputTypesIndex++] = outputDataTypePhysicalVariation; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + objectInspectors.add(objInsp); + } + + for(int i = 0; i < aggrCount; ++i) { + VectorAggregationDesc vecAggrDesc = vectorAggregationDescs[i]; + TypeInfo outputTypeInfo = vecAggrDesc.getOutputTypeInfo(); + outputTypeInfos[outputTypesIndex] = outputTypeInfo; + outputDataTypePhysicalVariations[outputTypesIndex++] = + vecAggrDesc.getOutputDataTypePhysicalVariation(); + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo); + objectInspectors.add(objInsp); + } + + standardOutputObjInspector = + ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors); + outputObjInspector = standardOutputObjInspector; + + /* + * Setup the GROUP BY output batch and vectorization context for downstream vector operators. + */ + vectorizedRowBatchCtx = + new VectorizedRowBatchCtx( + conf.getOutputColumnNames().toArray(new String[0]), + outputTypeInfos, + outputDataTypePhysicalVariations, + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vOutContext.getScratchColumnTypeNames(), + vOutContext.getScratchDataTypePhysicalVariations()); + + outputBatch = vectorizedRowBatchCtx.createVectorizedRowBatch(); + } + + public void forwardOutputBatch(VectorizedRowBatch outputBatch) throws HiveException { + + forward(outputBatch, null); + + outputBatch.reset(); + } + + /** + * Copy all of the keys and aggregations to the output batch. + */ + protected abstract void outputGroupBy() throws HiveException; + + protected void flushGroupBy() throws HiveException { + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + } + + /** + * On close, make sure a partially filled overflow batch gets forwarded. + */ + @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + flushGroupBy(); + } + LOG.debug("VectorGroupByCommonOutputOperator closeOp " + batchCounter + " batches processed"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java new file mode 100644 index 0000000..87a2e2b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.VectorGroupByCommonOutput; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash operator class of Native Vector GROUP BY for hash related + * initialization logic. + */ +public abstract class VectorGroupByHashCommon + extends VectorGroupByCommonOutput { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashCommon.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient long hashGroupByMemoryAvailableByteLength; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashCommon() { + super(); + } + + public VectorGroupByHashCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final float memoryPercentage = conf.getGroupByMemoryUsage(); + final int testMaxMemoryAvailable = vectorGroupByInfo.getTestGroupByMaxMemoryAvailable(); + final long maxMemoryAvailable = + (testMaxMemoryAvailable == -1 ? + conf.getMaxMemoryAvailable() : testMaxMemoryAvailable); + hashGroupByMemoryAvailableByteLength = (long) (memoryPercentage * maxMemoryAvailable); + } + + /* + * Return the power of 2 that is equal to or next below a value. + * + * Example: + * 100000b = 2^5 = 32 + * where Long.numberOfLeadingZeros returns (64 - 6) = 58 + * and the result = 5. + * + * Replacing any set of lower 0's with 1's doesn't change the result. + * Or, numbers 32 to 63 return 5. + * + */ + public static int floorPowerOf2(long a) { + if (a == 0) { + return 0; + } + final int floorLeadingZerosCount = Long.numberOfLeadingZeros(a); + final int result = Long.SIZE - floorLeadingZerosCount - 1; + return result; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java new file mode 100644 index 0000000..a3ee7ed --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java @@ -0,0 +1,253 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * This class is common hash operator class of Native Vector GROUP BY with common operator + * logic for checking key limits and the common process method logic. + */ +public abstract class VectorGroupByHashOperatorBase + extends VectorGroupByHashTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashOperatorBase() { + super(); + } + + public VectorGroupByHashOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + determineInitialHashTableSize(); + + allocateHashTable(); + } + + /* + * Create a VectorDeserializeRow object for deserializing a multi-column key. (Or, a single + * column of a non-optimized data type). + */ + protected VectorDeserializeRow initMultiKeyDeserialize() + throws HiveException { + + final int size = groupByKeyExpressions.length; + TypeInfo[] typeInfos = new TypeInfo[size]; + DataTypePhysicalVariation[] dataTypePhysicalVariations = + new DataTypePhysicalVariation[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + dataTypePhysicalVariations[i] = keyExpr.getOutputDataTypePhysicalVariation(); + } + + VectorDeserializeRow keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + dataTypePhysicalVariations, + /* useExternalBuffer */ true)); + // GROUP BY multi-key is starting at output column 0. + keyVectorDeserializeRow.init(0); + + return keyVectorDeserializeRow; + } + + protected void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * If the hash table has less than the worst-case inputLogicalSize keys that + * could be added, then flush the current hash table entries and clear it. + */ + checkKeyLimitOncePerBatch(inputLogicalSize); + } + + protected abstract void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException; + + /* + * Common process method that does common work then drives the specialized Operator classes with + * the doBeforeMainLoopWork and doMainLoop overrides. + */ + @Override + public void process(Object row, int tag) throws HiveException { + + try { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + batchCounter++; + + final int inputLogicalSize = batch.size; + + if (inputLogicalSize == 0) { + return; + } + + /* + * Perform any key expressions. Results will go into scratch columns. + */ + if (groupByKeyExpressions != null) { + for (VectorExpression ve : groupByKeyExpressions) { + ve.evaluate(batch); + } + } + + doBeforeMainLoopWork(inputLogicalSize); + + doMainLoop(batch, inputLogicalSize); + + } catch (Exception e) { + throw new HiveException(e); + } + } + + /* + * Add a GROUP BY output row to the output batch for a NULL 1 column key and single COUNT. + */ + protected void outputCountForNullSingleKey(ColumnVector keyColumnVector, + LongColumnVector countColumnVector, long nullKeyCount) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + countColumnVector.isNull[nullBatchIndex] = false; + countColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } + + /* + * Add a GROUP BY output row to the output batch for a NULL 1 column key and a + * single LONG word-aggr. + */ + protected void outputAggregateForNullSingleKey(ColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector, boolean isNullKeyAggregateNull, + long nullKeyAggregate) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + aggregateColumnVector.isNull[nullBatchIndex] = isNullKeyAggregateNull; + aggregateColumnVector.vector[nullBatchIndex] = nullKeyAggregate; + + outputBatch.size++; + } + + /* + * Add a GROUP BY output row to the output batch for a NULL N column key and single COUNT. + */ + protected void outputCountForNullMultiKey(long nullKeyCount) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + LongColumnVector countKeyColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + countKeyColumnVector.isNull[nullBatchIndex] = false; + countKeyColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } + + /* + * Add a GROUP BY output row to the output batch for a NULL N column key and a + * single LONG word-aggr. + */ + protected void outputAggregateForNullMultiKey(boolean isNullKeyAggregateNull, + long nullKeyAggregate) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + LongColumnVector aggregateColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + aggregateColumnVector.isNull[nullBatchIndex] = isNullKeyAggregateNull; + aggregateColumnVector.vector[nullBatchIndex] = nullKeyAggregate; + + outputBatch.size++; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java new file mode 100644 index 0000000..c32e18f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java @@ -0,0 +1,350 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash table class of Native Vector GROUP BY. + */ +public abstract class VectorGroupByHashTable + extends VectorGroupByHashCommon { + + private static final long serialVersionUID = 1L; + + private static final String CLASS_NAME = VectorGroupByHashTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + private boolean isBytesHashTable; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // How many times we encountered a limit on the hash table and had to flush and recreate. + private long flushAndRecreateCount; + + // Memory available in bytes for the slot table, and when we have bytes keys, the memory available + // for the key store. + protected transient long hashTableMemoryAvailableByteLength; + protected transient long keyStoreMemoryAvailableByteLength; + + // The logical size and power of 2 mask of the hash table + protected transient int logicalHashBucketCount; + protected transient int logicalHashBucketMask; + + // The number of longs in the hash table slot array. It is the logical size * entries per slot. + protected int slotPhysicalArraySize; + + // The maximum number of keys we'll keep in the hash table before flushing. + protected transient int hashTableKeyCountLimit; + + // The slot table with 1, 2, 3, etc longs per entry. + protected transient long[] slotMultiples; + + // The key count and largest number of misses in our quadratic probing style hash table. + // Maintained by the hash table variations. + protected transient int keyCount; + protected transient int largestNumberOfSteps; + + // Byte length for WriteBuffers segments in the VectorKeyStore used for bytes keys + protected transient int keyStoreByteSize; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashTable() { + super(); + } + + public VectorGroupByHashTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + isBytesHashTable = + (this.vectorDesc.getVectorGroupByInfo().getHashTableKeyType() != HashTableKeyType.LONG); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + flushAndRecreateCount = 0; + + divvyUpHashGroupByMemory(); + } + + public long getFlushAndStartOverCount() { + return flushAndRecreateCount; + } + + public abstract int getHashTableMultiple(); + + /* + * Decide how to apportion memory for the slot table, and for the key store when we have bytes + * keys. (Single long keys are stored in the slot table). + */ + private void divvyUpHashGroupByMemory() { + + /* + * CONCERN: + * Do we really want a hash table to use the maximum supplied memory immediately? + * That could waste memory that other operators could use. And, cause Java GC + * issues because of how large the single slot table array is. Large hash tables + * with small keys sets could cause lots of unnecessary cold RAM hits. There is a tension + * here, of course. Too small a table and there will be more insert collisions. + * + * In contrast, the current VectorGroupByOperator and GroupByOperator classes use a + * Java HeapMap which automatically grows over time. + * + * The issues here are similar to MapJoin, except we have the possibility of using a smaller + * hash table and flushing everything to Reduce. Then, creating a larger slot table instead + * of zeroing the current one. MapJoin cannot flush -- it either needs to expand its + * hash tables to hold everything or spill some of the data to secondary storage (Hybrid Grace). + */ + + if (isBytesHashTable) { + + // UNDONE: Use key size estimates to make better decision than half... + final long half = hashGroupByMemoryAvailableByteLength / 2; + hashTableMemoryAvailableByteLength = half; + keyStoreMemoryAvailableByteLength = half; + } else { + hashTableMemoryAvailableByteLength = hashGroupByMemoryAvailableByteLength; + keyStoreMemoryAvailableByteLength = 0; + } + } + + //------------------------------------------------------------------------------------------------ + + private static final int LARGEST_NUMBER_OF_STEPS_THRESHOLD = 6; + + public boolean isAboveLargestNumberOfStepsThresold() { + return (largestNumberOfSteps > LARGEST_NUMBER_OF_STEPS_THRESHOLD); + } + + /* + * Do override this method in specialized hash tables that have more to initialize and/or create. + */ + public void allocateHashTable() throws HiveException { + allocateBucketArray(); + } + + /* + * Allocate the key store when we have bytes keys. + */ + public VectorKeyStore allocateVectorKeyStore(VectorKeyStore keyStore) { + if (keyStore == null) { + return new VectorKeyStore(keyStoreByteSize); + } else { + keyStore.clear(); + return keyStore; + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void releaseHashTableMemory() throws HiveException { + if (slotMultiples.length == slotPhysicalArraySize) { + + // Keep it and clear it later. + return; + } + slotMultiples = null; + } + + // Since a maximum integer is 2^N - 2 it cannot be used we need one less than number of + // Integer bits. 2^30 = 1,073,741,824 + private static final int MAX_POWER_OF_2_FOR_INT_INDEXING = Integer.SIZE - 2; + + // An arbitrary factor to divide the slot table size by to get the key count limit. + // Hitting the key count limit will cause the hash table to be flushed to Reduce and cleared + // for refilling. + private static final int KEY_COUNT_FACTOR = 8; + + // Make sure we have comfortable room for at least one batch of new keys to support the + // VectorGroupByHashOperatorBase.checkKeyLimitOncePerBatch method. + private static final int MIN_HASH_TABLE_BYTE_LENGTH = + VectorizedRowBatch.DEFAULT_SIZE * KEY_COUNT_FACTOR * (Long.SIZE / Byte.SIZE); + private static final int MIN_POWER_OF_2 = floorPowerOf2(MIN_HASH_TABLE_BYTE_LENGTH); + + /* + * Determine the size for the slot table and, for bytes keys the key store. + */ + public void determineInitialHashTableSize() throws HiveException { + + /* + * Slot table size. + */ + + final int multiple = getHashTableMultiple(); + + // Take in account our multiple. + final int floorPowerOf2MaxHashTableMemoryByteLength = + floorPowerOf2(hashTableMemoryAvailableByteLength / multiple); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2HashTableMemoryByteLength = + Math.min(floorPowerOf2MaxHashTableMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + // UNDONE: Artificially limit for now... 2^24 = 16,777,216 bytes. + maxPowerOf2HashTableMemoryByteLength = Math.min(maxPowerOf2HashTableMemoryByteLength, 24); + + final int powerOf2HashTableMemoryByteLength = + Math.max(maxPowerOf2HashTableMemoryByteLength, MIN_POWER_OF_2); + + final int hashTableByteSize = (1 << powerOf2HashTableMemoryByteLength); + final int hashTableLongSize = hashTableByteSize / (Long.SIZE / Byte.SIZE); + + logicalHashBucketCount = hashTableLongSize; + + slotPhysicalArraySize = logicalHashBucketCount * multiple; + + /* + * Key store size. + */ + + if (isBytesHashTable) { + final int floorPowerOf2MaxKeyStoreMemoryByteLength = + floorPowerOf2(keyStoreMemoryAvailableByteLength); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2KeyStoreMemoryByteLength = + Math.min(floorPowerOf2MaxKeyStoreMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + keyStoreByteSize = (1 << maxPowerOf2KeyStoreMemoryByteLength); + + // CONSIDER: Better min/max limits. + keyStoreByteSize = Math.min(keyStoreByteSize, 1024 * 1024); + keyStoreByteSize = Math.max(keyStoreByteSize, 128 * 1024); + } + + if (!isBytesHashTable) { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple); + } else { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple + + " key store size " + keyStoreByteSize); + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void determineNextHashTableSize() throws HiveException { + // CONSIDER: Growing the hash table size upon examining current hash table. + } + + /* + * For now, we are just allocating the slot table array. + * FUTURE: We'll need to revisit these calculations when we support STRING keys. + */ + protected void allocateBucketArray() { + if (slotMultiples != null) { + + // The releaseHashTableMemory method kept same size array, so just clear it. + Arrays.fill(slotMultiples, 0); + } else { + + logicalHashBucketMask = logicalHashBucketCount - 1; + + hashTableKeyCountLimit = logicalHashBucketCount / KEY_COUNT_FACTOR; + + slotMultiples = new long[slotPhysicalArraySize]; + } + + keyCount = 0; + largestNumberOfSteps = 0; + + if (flushAndRecreateCount != 0) { + LOG.info("Flush and recreate #" + flushAndRecreateCount); + } + } + + /* + * Check the worst case possibility -- adding a new key for each row in the batch -- and flush + * and recreate the hash table. + */ + protected void checkKeyLimitOncePerBatch(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * Check the hash table key limit for doing the worst case of adding all keys outside the + * inner loop for better performance. + */ + final boolean isReachedKeyLimit = + (keyCount + inputLogicalSize > hashTableKeyCountLimit); + if (isReachedKeyLimit || isAboveLargestNumberOfStepsThresold()) { + LOG.info( + "Reached key limit " + isReachedKeyLimit + + ", above largest number of steps thresold " + isAboveLargestNumberOfStepsThresold()); + + flushAndRecreateCount++; + flushAndRecreate(); + if (keyCount + inputLogicalSize > hashTableKeyCountLimit) { + + // Hash table is way too small. + raise2ndHitOutOfStorage(); + } + } + } + + protected void raise2ndHitOutOfStorage() throws HiveException { + throw new HiveException( + "After flushing hash table and clearing, there still isn't enough storage?"); + } + + protected void flushAndRecreate() throws HiveException, IOException { + + /* + * 1) Flush hash table. + * 2) Use current state to determine next sizes. + * 3) Release memory, if necessary. + * 4) Recreate/clear using next sizes. + */ + + flushGroupBy(); + + // Based on current hash table sizes and perhaps historical information, determine + // the size to use next during recreation. + determineNextHashTableSize(); + + releaseHashTableMemory(); + + allocateHashTable(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyCountTable.java new file mode 100644 index 0000000..00cd242 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyCountTable.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * A single bytes key hash table optimized for a single count Native Vector GROUP BY. + */ +public abstract class VectorGroupByHashBytesKeyCountTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + private transient WriteBuffers writeBuffers; + protected WriteBuffers.Position readPos; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyCountTable() { + super(); + } + + public VectorGroupByHashBytesKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + writeBuffers = keyStore.getWriteBuffers(); + readPos = new WriteBuffers.Position(); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + writeBuffers = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_ENTRY_SIZE; + } + + protected static final int BYTES_ENTRY_SIZE = 2; + + public void findOrCreateBytesKeyCount(byte[] keyBytes, int keyStart, int keyLength, + long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + boolean isNewKey; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + pairIndex = 2 * slot; + refWord = slotMultiples[pairIndex]; + if (refWord == 0) { + isNewKey = true; + break; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + + // First entry. + slotMultiples[pairIndex] = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + slotMultiples[pairIndex + 1] = count; + + keyCount++; + + } else if (count > 0) { + + slotMultiples[pairIndex + 1] += count; + } + } + + private int countKeyPairIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + private long currentCountKeyCount; + + protected int initBytesKeyIterator() { + countKeyPairIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + currentCountKeyCount = 0; + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[countKeyPairIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + currentCountKeyCount = slotMultiples[countKeyPairIndex + 1]; + + countKeyPairIndex += 2; + return; + } + countKeyPairIndex += 2; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } + + public long getCount() { + return currentCountKeyCount; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyDuplicateReductionTable.java new file mode 100644 index 0000000..a46ee28 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyDuplicateReductionTable.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/* + * A single bytes key hash table optimized for duplicate reduction Native Vector GROUP BY. + */ +public abstract class VectorGroupByHashBytesKeyDuplicateReductionTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + private transient WriteBuffers writeBuffers; + protected WriteBuffers.Position readPos; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashBytesKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + writeBuffers = keyStore.getWriteBuffers(); + readPos = new WriteBuffers.Position(); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + writeBuffers = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static final int BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void createOrIgnoreBytesDuplicateReductionKey(byte[] keyBytes, int keyStart, int keyLength, + long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + refWord = slotMultiples[slot]; + if (refWord == 0) { + // First entry. + slotMultiples[slot] = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + + keyCount++; + return; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + // Ignore. A duplicate has been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + } + + private int iterateIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + + protected int initBytesKeyIterator() { + iterateIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[iterateIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + + iterateIndex++; + return; + } + iterateIndex++; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyWordAggrTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyWordAggrTable.java new file mode 100644 index 0000000..6167f15 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/VectorGroupByHashBytesKeyWordAggrTable.java @@ -0,0 +1,282 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.VectorGroupByHashLongKeyWordAggrTable.FlagsWord; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * A single bytes key hash table optimized for a single count Native Vector GROUP BY. + */ +public abstract class VectorGroupByHashBytesKeyWordAggrTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // Variables from most recent findLongKeyWord call: + protected boolean currentIsAggregationNeeded; + protected int currentPairIndex; + protected long currentLongWordAggr; + + private VectorKeyStore keyStore; + private WriteBuffers writeBuffers; + private WriteBuffers.Position readPos; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyWordAggrTable() { + super(); + } + + public VectorGroupByHashBytesKeyWordAggrTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + writeBuffers = keyStore.getWriteBuffers(); + readPos = new WriteBuffers.Position(); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + writeBuffers = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_ENTRY_SIZE; + } + + protected static final int BYTES_ENTRY_SIZE = 2; + + /* + * Create a hash table entry if it doesn't exist for a NULL word-aggr. If the entry already + * exists, ignore it. + */ + public void createOrIgnoreBytesKeyNullEntry(byte[] keyBytes, int keyStart, int keyLength, + long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + pairIndex = 2 * slot; + refWord = slotMultiples[pairIndex]; + if (refWord == 0) { + + // Create. + refWord = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + slotMultiples[pairIndex] = refWord | KeyRef.Flag.flagOnMask; + + currentIsAggregationNeeded = false; + + keyCount++; + return; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + + // Ignore. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + } + + /* + * For a new non-NULL word-aggr. + * + * Create a hash table entry if it doesn't exist. Save the input word-aggr. + * + * If the entry already exists and the current word-aggr is NULL, save the input word-aggr. + * + * Set currentIsAggregationNeeded flag to false for those cases. + * + * Otherwise, we have a current word-aggr that requires aggregation by the caller with the + * new word-aggr. Set currentIsAggregationNeeded flag to true and currentLongWordAggr to the + * current word-aggr. + * + * If the caller does compute a different word-aggr, then they call replaceLongWordAggr to replace + * the current word-aggr in the hash table. + */ + public void findOrCreateBytesKeyWord(byte[] keyBytes, int keyStart, int keyLength, + long hashCode, long word) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + pairIndex = 2 * slot; + refWord = slotMultiples[pairIndex]; + if (refWord == 0) { + + // Create. + slotMultiples[pairIndex] = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + slotMultiples[pairIndex + 1] = word; + + currentIsAggregationNeeded = false; + + keyCount++; + return; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + if (KeyRef.getFlag(refWord)) { + + // Turn off NULL flag. + slotMultiples[pairIndex] = refWord & KeyRef.Flag.flagOffMask; + slotMultiples[pairIndex + 1] = word; + currentIsAggregationNeeded = false; + } else { + currentPairIndex = pairIndex; + currentLongWordAggr = slotMultiples[pairIndex + 1]; + currentIsAggregationNeeded = true; + } + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + } + + public void replaceLongWordAggr(long word) { + slotMultiples[currentPairIndex + 1] = word; + } + + public void setLongWordAggrOverflow() { + + // Turn on OVERFLOW flag. + slotMultiples[currentPairIndex] |= KeyRef.Flag2.flagOnMask; + } + + private int iteratePairIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + protected long iterateRefWord; + protected boolean iterateIsNullWord; + protected long iterateWord; + + protected int initBytesKeyIterator() { + iteratePairIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + iterateRefWord = 0; + iterateIsNullWord = false; + iterateWord = 0; + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long refWord = slotMultiples[iteratePairIndex]; + if (refWord != 0) { + iterateRefWord = refWord; + keyStore.getKey( + refWord, + keyByteSegmentRef, + keyReadPos); + if (KeyRef.getFlag(refWord)) { + iterateIsNullWord = true; + } else { + iterateIsNullWord = false; + iterateWord = slotMultiples[iteratePairIndex + 1]; + } + + iteratePairIndex += 2; + return; + } + iteratePairIndex += 2; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } + + public boolean getIsOverflow() { + return KeyRef.getFlag2(iterateRefWord); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyCountTable.java new file mode 100644 index 0000000..b9c2f4e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyCountTable.java @@ -0,0 +1,298 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * A LONG key hash table optimized for single COUNT Native Vector GROUP BY. + */ +public abstract class VectorGroupByHashLongKeyCountTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected final int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_NON_ZERO_COUNT_ENTRY_SIZE; + } + + protected static final int LONG_NON_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongKeyNonZeroCount(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + + private int nonZeroCountPairIndex; + private long currentNonZeroCount; + + protected int initLongNonZeroCountKeyIterator() { + nonZeroCountPairIndex = 0; + currentNonZeroCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextNonZeroCountKey() { + while (true) { + long count = slotMultiples[nonZeroCountPairIndex + 1]; + if (count > 0) { + currentNonZeroCount = count; + long key = slotMultiples[nonZeroCountPairIndex]; + nonZeroCountPairIndex += 2; + return key; + } + nonZeroCountPairIndex += 2; + } + } + + public long getLongNonZeroCount() { + return currentNonZeroCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key non-zero count hash table to the + * output. + */ + protected void outputLongNonZeroKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongNonZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNextNonZeroCountKey(); + countVector[i] = getLongNonZeroCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } + + //------------------------------------------------------------------------------------------------ + + private static long LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK = 1L << 63; + + protected static int LONG_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongKeyZeroCount(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + if (count == 0) { + slotMultiples[pairIndex + 1] = LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK; + } else { + slotMultiples[pairIndex + 1] = count; + } + } else if (count > 0) { + + // Only update count when we are leaving 0. + if (slotMultiples[pairIndex + 1] == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + } + + private int countKeyPairIndex; + private long currentCountKeyCount; + + protected int initLongZeroCountKeyIterator() { + countKeyPairIndex = 0; + currentCountKeyCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextZeroCountKey() { + while (true) { + long count = slotMultiples[countKeyPairIndex + 1]; + if (count != 0) { + if (count == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + currentCountKeyCount = 0; + } else { + currentCountKeyCount = count; + } + long key = slotMultiples[countKeyPairIndex]; + countKeyPairIndex += 2; + return key; + } + countKeyPairIndex += 2; + } + } + + public long getCount() { + return currentCountKeyCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + protected void outputLongZeroCountKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getNextZeroCountKey(); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyDuplicateReductionTable.java new file mode 100644 index 0000000..da80e24 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyDuplicateReductionTable.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * A single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashLongKeyDuplicateReductionTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected boolean haveZeroKey; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveZeroKey = false; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static int LONG_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void createOrIgnoreLongDuplicateReductionKey(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + while (true) { + if (slotMultiples[slot] == 0) { + break; + } + if (key == slotMultiples[slot]) { + // Found it! A duplicate has now been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + // Create first-time key. + slotMultiples[slot] = key; + keyCount++; + } + + private int countKeyIndex; + + protected int initLongDuplicateReductionKeyIterator() { + countKeyIndex = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long key = slotMultiples[countKeyIndex++]; + if (key != 0) { + return key; + } + } + } + + protected void doOutputLongKeys( + LongColumnVector keyColumnVector) throws HiveException { + + long[] keyVector = keyColumnVector.vector; + + if (haveZeroKey) { + + // Zero key to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + keyVector[outputBatch.size++] = 0; + } + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongDuplicateReductionKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyWordAggrTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyWordAggrTable.java new file mode 100644 index 0000000..a564619 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/VectorGroupByHashLongKeyWordAggrTable.java @@ -0,0 +1,381 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * UNDONE: <---------------- + * Native Vectorized GroupBy + */ +public abstract class VectorGroupByHashLongKeyWordAggrTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected final int keyColumnNum; + + // Variables from most recent findLongKeyWord call: + protected boolean currentIsAggregationNeeded; + private int currentTripleIndex; + protected long currentLongWordAggr; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyWordAggrTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyWordAggrTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_WORD_ENTRY_SIZE; + } + + protected static final int LONG_WORD_ENTRY_SIZE = 3; + + public static final class FlagsWord { + + public static final class IsExistsFlag { + public static final int bitLength = 1; + public static final long flagOnMask = 1L; + } + + public static boolean getIsExistsFlag(long flagsWord) { + return (flagsWord & IsExistsFlag.flagOnMask) != 0; + } + + public static final class IsNullFlag { + public static final int bitLength = 1; + public static final int bitShift = IsExistsFlag.bitLength; + public static final long flagOnMask = 1L << bitShift; + } + + public static boolean getIsNullFlag(long flagsWord) { + return (flagsWord & IsNullFlag.flagOnMask) != 0; + } + + public static final class IsOverflowFlag { + public static final int bitShift = IsNullFlag.bitShift + IsNullFlag.bitLength; + public static final long flagOnMask = 1L << bitShift; + } + + public static final boolean getIsOverflowFlag(long flagsWord) { + return (flagsWord & IsOverflowFlag.flagOnMask) != 0; + } + + public static long existsAndIsNullMask = IsExistsFlag.flagOnMask | IsNullFlag.flagOnMask; + } + + /* + * Create a hash table entry if it doesn't exist for a NULL word-aggr. If the entry already + * exists, ignore it. + */ + public void createOrIgnoreLongKeyNullEntry(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int tripleIndex = 0; + while (true) { + tripleIndex = 3 * slot; + final long flagsWord = slotMultiples[tripleIndex + 1]; + if (flagsWord == 0) { + + // Create. + slotMultiples[tripleIndex + 1] = FlagsWord.existsAndIsNullMask; + + slotMultiples[tripleIndex] = key; + + keyCount++; + return; + } + if (key == slotMultiples[tripleIndex]) { + + // Ignore. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + } + + /* + * For a new non-NULL word-aggr. + * + * Create a hash table entry if it doesn't exist. Save the input word-aggr. + * + * If the entry already exists and the current word-aggr is NULL, save the input word-aggr. + * + * Set currentIsAggregationNeeded flag to false for those cases. + * + * Otherwise, we have a current word-aggr that requires aggregation by the caller with the + * new word-aggr. Set currentIsAggregationNeeded flag to true and currentLongWordAggr to the + * current word-aggr. + * + * If the caller does compute a different word-aggr, then they call replaceLongWordAggr to replace + * the current word-aggr in the hash table. + */ + public void findOrCreateLongKeyWord(long key, long hashCode, long word) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int tripleIndex = 0; + while (true) { + tripleIndex = 3 * slot; + final long flagsWord = slotMultiples[tripleIndex + 1]; + if (flagsWord == 0) { + slotMultiples[tripleIndex + 1] = FlagsWord.IsExistsFlag.flagOnMask; + slotMultiples[tripleIndex + 2] = word; + + slotMultiples[tripleIndex] = key; + currentIsAggregationNeeded = false; + + keyCount++; + return; + } + if (key == slotMultiples[tripleIndex]) { + if ((flagsWord & FlagsWord.IsNullFlag.flagOnMask) != 0) { + + // Turn off NULL flag. + slotMultiples[tripleIndex + 1] = FlagsWord.IsExistsFlag.flagOnMask; + slotMultiples[tripleIndex + 2] = word; + currentIsAggregationNeeded = false; + } else { + currentTripleIndex = tripleIndex; + currentLongWordAggr = slotMultiples[tripleIndex + 2]; + currentIsAggregationNeeded = true; + } + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + } + + public void replaceLongWordAggr(long word) { + slotMultiples[currentTripleIndex + 2] = word; + } + + public void setLongWordAggrOverflow() { + + // Turn on OVERFLOW flag. + slotMultiples[currentTripleIndex + 1] |= FlagsWord.IsOverflowFlag.flagOnMask; + } + + private int iterateTripleIndex; + private long iterateFlagsWord; + private boolean iterateIsNullWord; + private long iterateWord; + + protected int initLongKeyWordIterator() { + iterateTripleIndex = 0; + iterateFlagsWord = 0; + iterateIsNullWord = false; + iterateWord = 0; + return keyCount; + } + + // Find next key and return it. + protected long getIterateNextLongKey() { + while (true) { + long flagsWord = slotMultiples[iterateTripleIndex + 1]; + if (flagsWord != 0) { + iterateFlagsWord = flagsWord; + if ((flagsWord & FlagsWord.IsNullFlag.flagOnMask) != 0) { + iterateIsNullWord = true; + } else { + iterateIsNullWord = false; + iterateWord = slotMultiples[iterateTripleIndex + 2]; + } + long key = slotMultiples[iterateTripleIndex]; + iterateTripleIndex += 3; + return key; + } + iterateTripleIndex += 3; + } + } + + public boolean getIsOverflow() { + return FlagsWord.getIsOverflowFlag(iterateFlagsWord); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + protected void outputLongKeyAndAggregatePairs( + LongColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongKeyWordIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getIterateNextLongKey(); + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void outputLongKeyAndDecimal64SumPairs( + LongColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongKeyWordIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getIterateNextLongKey(); + if (iterateIsNullWord || getIsOverflow()) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void outputLongKeyAndAggregatePairs( + LongColumnVector keyColumnVector, + DoubleColumnVector aggregateColumnVector) throws HiveException { + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + double[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongKeyWordIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getIterateNextLongKey(); + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyCountTable.java new file mode 100644 index 0000000..0b0bc02 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyCountTable.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An serialized multi-key hash table optimized for single COUNT Native Vector GROUP BY. + */ +public abstract class VectorGroupByHashMultiKeyCountTable + extends VectorGroupByHashBytesKeyCountTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountTable() { + super(); + } + + public VectorGroupByHashMultiKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + keyVectorDeserializeRow = initMultiKeyDeserialize(); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeyAndCounts() throws HiveException { + + final int keySize = groupByKeyExpressions.length; + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyDuplicateReductionTable.java new file mode 100644 index 0000000..eb66e44 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyDuplicateReductionTable.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashMultiKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashMultiKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + keyVectorDeserializeRow = initMultiKeyDeserialize(); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeys() throws HiveException { + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyWordAggrTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyWordAggrTable.java new file mode 100644 index 0000000..95b3555 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/VectorGroupByHashMultiKeyWordAggrTable.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyWordAggrTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashMultiKeyWordAggrTable + extends VectorGroupByHashBytesKeyWordAggrTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyWordAggrTable() { + super(); + } + + public VectorGroupByHashMultiKeyWordAggrTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + keyVectorDeserializeRow = initMultiKeyDeserialize(); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeyAndAggregatePairs() throws HiveException { + + final int keySize = groupByKeyExpressions.length; + LongColumnVector aggregateColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void doOutputMultiKeyAndDecimal64SumPairs() throws HiveException { + + final int keySize = groupByKeyExpressions.length; + LongColumnVector aggregateColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + + // For DECIMAL_64 SUM, we must check for overflow. + if (iterateIsNullWord || getIsOverflow()) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyCountTable.java new file mode 100644 index 0000000..9314be3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyCountTable.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSingleKeyCountTable + extends VectorGroupByHashBytesKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeyCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSingleKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSingleKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyDuplicateReductionTable.java new file mode 100644 index 0000000..2be6cc6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyDuplicateReductionTable.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSingleKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSingleKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSerializeKeys( + ColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyWordAggrTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyWordAggrTable.java new file mode 100644 index 0000000..58059ed --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/VectorGroupByHashSingleKeyWordAggrTable.java @@ -0,0 +1,277 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyWordAggrTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * UNDONE: <---------------- + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSingleKeyWordAggrTable + extends VectorGroupByHashBytesKeyWordAggrTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeyWordAggrTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSingleKeyWordAggrTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and aggregate pairs of the one string hash table to the output. + */ + protected void doOutputSingleKeyAndAggregatePairs( + ColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void doOutputSingleKeyAndDecimal64SumPairs( + ColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + + // For DECIMAL_64 SUM, we must check for overflow. + if (iterateIsNullWord || getIsOverflow()) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void doOutputSingleKeyAndAggregatePairs( + ColumnVector keyColumnVector, + DoubleColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + double[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void doOutputSingleKeyAndDecimal64SumPairs( + ColumnVector keyColumnVector, + DoubleColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + double[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + if (iterateIsNullWord || getIsOverflow()) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyCountTable.java new file mode 100644 index 0000000..7dbdd53 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyCountTable.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * An single string key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyCountTable + extends VectorGroupByHashBytesKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeyAndCountPairs( + BytesColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyDuplicateReductionTable.java new file mode 100644 index 0000000..850c4fb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyDuplicateReductionTable.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/*U + * An single long string key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeys( + BytesColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyWordAggrTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyWordAggrTable.java new file mode 100644 index 0000000..8614342 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/VectorGroupByHashStringKeyWordAggrTable.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.VectorGroupByHashBytesKeyWordAggrTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * UNDONE: <---------------- + * An single string key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyWordAggrTable + extends VectorGroupByHashBytesKeyWordAggrTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyWordAggrTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyWordAggrTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeyAndAggregatePairs( + BytesColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void doOutputStringKeyAndDecimal64SumPairs( + BytesColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + // For DECIMAL_64 SUM, we must check for overflow. + if (iterateIsNullWord || getIsOverflow()) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/hashkeyref/VectorHashKeyRef.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/hashkeyref/VectorHashKeyRef.java new file mode 100644 index 0000000..26835a2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/hashkeyref/VectorHashKeyRef.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.hashkeyref; + +import org.apache.hadoop.hive.serde2.WriteBuffers; +// import com.google.common.base.Preconditions; + +public class VectorHashKeyRef { + + public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers writeBuffers, WriteBuffers.Position readPos) { + + // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + + final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + + writeBuffers.setReadPoint(absoluteOffset, readPos); + + int actualKeyLength = KeyRef.getSmallKeyLength(refWord); + boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); + if (!isKeyLengthSmall) { + + // And, if current value is big we must read it. + actualKeyLength = writeBuffers.readVInt(readPos); + } + + if (actualKeyLength != keyLength) { + return false; + } + + // Our reading was positioned to the key. + if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { + return false; + } + + return true; + } + + public static int calculateHashCode(long refWord, WriteBuffers writeBuffers, + WriteBuffers.Position readPos) { + + // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + + final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + + int actualKeyLength = KeyRef.getSmallKeyLength(refWord); + boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); + final long keyAbsoluteOffset; + if (!isKeyLengthSmall) { + + // Position after next relative offset (fixed length) to the key. + writeBuffers.setReadPoint(absoluteOffset, readPos); + + // And, if current value is big we must read it. + actualKeyLength = writeBuffers.readVInt(readPos); + keyAbsoluteOffset = absoluteOffset + actualKeyLength; + } else { + keyAbsoluteOffset = absoluteOffset; + } + + return writeBuffers.unsafeHashCode(keyAbsoluteOffset, actualKeyLength); + } + + public static final class KeyRef { + + // Lowest field. + public static final class PartialHashCode { + public static final int bitLength = 15; + public static final long allBitsOn = (1L << bitLength) - 1; + public static final long bitMask = allBitsOn; + + // Choose the high bits of the hash code KNOWING it was calculated as an int. + // + // We want the partial hash code to be different than the + // lower bits used for our hash table slot calculations. + public static final int intChooseBitShift = Integer.SIZE - bitLength; + } + + public static long getPartialHashCode(long refWord) { + // No shift needed since this is the lowest field. + return refWord & PartialHashCode.bitMask; + } + + // Can make the 64 bit reference non-zero if this is non-zero. E.g. for hash map and + // hash multi-set, the offset is to the first key which is always preceded by a 5 byte next + // relative value offset or 4 byte count. + public static final class AbsoluteOffset { + public static final int bitLength = 39; + public static final int byteLength = (bitLength + Byte.SIZE -1) / Byte.SIZE; + public static final long allBitsOn = (1L << bitLength) - 1; + public static final int bitShift = PartialHashCode.bitLength; + public static final long bitMask = ((long) allBitsOn) << bitShift; + + // Make it a power of 2. + public static final long maxSize = 1L << (bitLength - 2); + } + + public static long getAbsoluteOffset(long refWord) { + return (refWord & KeyRef.AbsoluteOffset.bitMask) >> AbsoluteOffset.bitShift; + } + + // When this field equals SmallKeyLength.allBitsOn, the key length is serialized at the + // beginning of the key. + public static final class SmallKeyLength { + public static final int bitLength = 7; + public static final int allBitsOn = (1 << bitLength) - 1; + public static final int threshold = allBitsOn; + public static final int bitShift = AbsoluteOffset.bitShift + AbsoluteOffset.bitLength; + public static final long bitMask = ((long) allBitsOn) << bitShift; + public static final long allBitsOnBitShifted = ((long) allBitsOn) << bitShift; + } + + public static int getSmallKeyLength(long refWord) { + return (int) ((refWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + } + + public static final class Flag { + public static final int bitLength = 1; + public static final int bitShift = SmallKeyLength.bitShift + SmallKeyLength.bitLength; + public static final long flagOnMask = 1L << bitShift; + public static final long flagOffMask = ~flagOnMask; + } + + public static boolean getFlag(long refWord) { + return (refWord & Flag.flagOnMask) != 0; + } + + public static final class Flag2 { + public static final int bitShift = Flag.bitShift + Flag.bitLength; + public static final long flagOnMask = 1L << bitShift; + public static final long flagOffMask = ~flagOnMask; + } + + public static boolean getFlag2(long refWord) { + return (refWord & Flag2.flagOnMask) != 0; + } + + // This bit should not be on for valid value references. We use -1 for a no value marker. + public static final class IsInvalidFlag { + public static final int bitShift = 63; + public static final long flagOnMask = 1L << bitShift; + } + + public static boolean getIsInvalidFlag(long refWord) { + return (refWord & IsInvalidFlag.flagOnMask) != 0; + } + } + + + /** + * Extract partial hash code from the full hash code. + * + * Choose the high bits of the hash code KNOWING it was calculated as an int. + * + * We want the partial hash code to be different than the + * lower bits used for our hash table slot calculations. + * + * @param hashCode + * @return + */ + public static long extractPartialHashCode(long hashCode) { + return (hashCode >>> KeyRef.PartialHashCode.intChooseBitShift) & KeyRef.PartialHashCode.bitMask; + } + + /** + * Get partial hash code from the reference word. + * @param hashCode + * @return + */ + public static long getPartialHashCodeFromRefWord(long refWord) { + return KeyRef.getPartialHashCode(refWord); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java new file mode 100644 index 0000000..2932d26 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.keystore; + +import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * Optimized for sequential key lookup. + */ + +public class VectorKeyStore implements MemoryEstimate { + + protected WriteBuffers writeBuffers; + + public WriteBuffers getWriteBuffers() { + return writeBuffers; + } + + /** + * A store for a bytes keys in memory. + * + * The memory is a "infinite" byte array as a WriteBuffers object. + * + * We give the client a 64-bit key and count reference to keep that + * has the offset within the "infinite" byte array of the key. The 64 bits includes about half + * of the upper hash code to help during matching. + * + * We optimize the common case when the key length is short and store that information in the + * 64 bit reference. + * + * Cases: + * + * 1) One element when key and is small (and stored in the reference word): + * + * Key and Value Reference + * | + * | absoluteOffset + * | + * | + * v + * + * KEY + * + * 2) One element, general: shows optional big key length. + * + * Key and Value Reference + * | + * | absoluteOffset + * | + * | + * v + * [Big Key Length] + * optional KEY + */ + + /** + * Two 64-bit long result is the key reference. + * @param partialHashCode + * @param keyBytes + * @param keyStart + * @param keyLength + */ + public long add(long partialHashCode, byte[] keyBytes, int keyStart, int keyLength) { + + // NOTE: In order to guarantee the reference word is non-zero, we write one pad byte in + // the constructor so absolute offset is non-zero. + final long absoluteOffset = writeBuffers.getWritePoint(); + + boolean isKeyLengthBig = (keyLength >= KeyRef.SmallKeyLength.threshold); + if (isKeyLengthBig) { + writeBuffers.writeVInt(keyLength); + } + writeBuffers.write(keyBytes, keyStart, keyLength); + + /* + * Form 64 bit key and value reference. + */ + long refWord = partialHashCode; + + refWord |= absoluteOffset << KeyRef.AbsoluteOffset.bitShift; + + if (isKeyLengthBig) { + refWord |= KeyRef.SmallKeyLength.allBitsOnBitShifted; + } else { + refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; + } + + // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); + + return refWord; + } + + public VectorKeyStore(int writeBuffersSize) { + writeBuffers = new WriteBuffers(writeBuffersSize, KeyRef.AbsoluteOffset.maxSize); + + // NOTE: In order to guarantee the reference word is non-zero, we write one pad byte. + long offset = writeBuffers.getWritePoint(); + if (offset != 0) { + throw new RuntimeException("Expected to be at offset 0"); + } + writeBuffers.write(0); + } + + public void clear() { + writeBuffers.clear(); + } + + /* + * Get a get from the store given a key reference. + * The supplied readPos makes the read safe for shared-memory usage. + */ + public void getKey(long refWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = KeyRef.getSmallKeyLength(refWord); + boolean isKeyLengthSmall = (storedKeyLength != KeyRef.SmallKeyLength.allBitsOn); + + long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + + writeBuffers.setReadPoint(absoluteOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + + @Override + public long getEstimatedMemorySize() { + long size = 0; + size += writeBuffers == null ? 0 : writeBuffers.getEstimatedMemorySize(); + return size; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java deleted file mode 100644 index dbfe518..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java +++ /dev/null @@ -1,178 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import org.apache.hadoop.hive.serde2.WriteBuffers; -// import com.google.common.base.Preconditions; - -public class VectorMapJoinFastBytesHashKeyRef { - - public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, int keyLength, - WriteBuffers writeBuffers, WriteBuffers.Position readPos) { - - // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); - - final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); - - writeBuffers.setReadPoint(absoluteOffset, readPos); - - int actualKeyLength = KeyRef.getSmallKeyLength(refWord); - boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); - if (!isKeyLengthSmall) { - - // And, if current value is big we must read it. - actualKeyLength = writeBuffers.readVInt(readPos); - } - - if (actualKeyLength != keyLength) { - return false; - } - - // Our reading was positioned to the key. - if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { - return false; - } - - return true; - } - - public static int calculateHashCode(long refWord, WriteBuffers writeBuffers, - WriteBuffers.Position readPos) { - - // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); - - final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); - - int actualKeyLength = KeyRef.getSmallKeyLength(refWord); - boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); - final long keyAbsoluteOffset; - if (!isKeyLengthSmall) { - - // Position after next relative offset (fixed length) to the key. - writeBuffers.setReadPoint(absoluteOffset, readPos); - - // And, if current value is big we must read it. - actualKeyLength = writeBuffers.readVInt(readPos); - keyAbsoluteOffset = absoluteOffset + actualKeyLength; - } else { - keyAbsoluteOffset = absoluteOffset; - } - - return writeBuffers.unsafeHashCode(keyAbsoluteOffset, actualKeyLength); - } - - public static final class KeyRef { - - // Lowest field. - public static final class PartialHashCode { - public static final int bitLength = 15; - public static final long allBitsOn = (1L << bitLength) - 1; - public static final long bitMask = allBitsOn; - - // Choose the high bits of the hash code KNOWING it was calculated as an int. - // - // We want the partial hash code to be different than the - // lower bits used for our hash table slot calculations. - public static final int intChooseBitShift = Integer.SIZE - bitLength; - } - - public static long getPartialHashCode(long refWord) { - // No shift needed since this is the lowest field. - return refWord & PartialHashCode.bitMask; - } - - // Can make the 64 bit reference non-zero if this is non-zero. E.g. for hash map and - // hash multi-set, the offset is to the first key which is always preceded by a 5 byte next - // relative value offset or 4 byte count. - public static final class AbsoluteOffset { - public static final int bitLength = 39; - public static final int byteLength = (bitLength + Byte.SIZE -1) / Byte.SIZE; - public static final long allBitsOn = (1L << bitLength) - 1; - public static final int bitShift = PartialHashCode.bitLength; - public static final long bitMask = ((long) allBitsOn) << bitShift; - - // Make it a power of 2. - public static final long maxSize = 1L << (bitLength - 2); - } - - public static long getAbsoluteOffset(long refWord) { - return (refWord & KeyRef.AbsoluteOffset.bitMask) >> AbsoluteOffset.bitShift; - } - - // When this field equals SmallKeyLength.allBitsOn, the key length is serialized at the - // beginning of the key. - public static final class SmallKeyLength { - public static final int bitLength = 8; - public static final int allBitsOn = (1 << bitLength) - 1; - public static final int threshold = allBitsOn; - public static final int bitShift = AbsoluteOffset.bitShift + AbsoluteOffset.bitLength; - public static final long bitMask = ((long) allBitsOn) << bitShift; - public static final long allBitsOnBitShifted = ((long) allBitsOn) << bitShift; - } - - public static int getSmallKeyLength(long refWord) { - return (int) ((refWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - } - - public static final class IsSingleFlag { - public static final int bitShift = SmallKeyLength.bitShift + SmallKeyLength.bitLength; - public static final long flagOnMask = 1L << bitShift; - public static final long flagOffMask = ~flagOnMask; - } - - public static boolean getIsSingleFlag(long refWord) { - return (refWord & IsSingleFlag.flagOnMask) != 0; - } - - // This bit should not be on for valid value references. We use -1 for a no value marker. - public static final class IsInvalidFlag { - public static final int bitShift = 63; - public static final long flagOnMask = 1L << bitShift; - } - - public static boolean getIsInvalidFlag(long refWord) { - return (refWord & IsInvalidFlag.flagOnMask) != 0; - } - } - - - /** - * Extract partial hash code from the full hash code. - * - * Choose the high bits of the hash code KNOWING it was calculated as an int. - * - * We want the partial hash code to be different than the - * lower bits used for our hash table slot calculations. - * - * @param hashCode - * @return - */ - public static long extractPartialHashCode(long hashCode) { - return (hashCode >>> KeyRef.PartialHashCode.intChooseBitShift) & KeyRef.PartialHashCode.bitMask; - } - - /** - * Get partial hash code from the reference word. - * @param hashCode - * @return - */ - public static long getPartialHashCodeFromRefWord(long refWord) { - return KeyRef.getPartialHashCode(refWord); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index add8b9c..43c12b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -20,6 +20,8 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; @@ -166,16 +168,16 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr boolean isNewKey; long refWord; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { refWord = slots[slot]; if (refWord == 0) { isNewKey = true; break; } - if (VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode && - VectorMapJoinFastBytesHashKeyRef.equalKey( + VectorHashKeyRef.equalKey( refWord, keyBytes, keyStart, keyLength, writeBuffers, unsafeReadPos)) { isNewKey = false; break; @@ -259,7 +261,7 @@ protected final int doHashMapMatch( long probeSlot = slot; int i = 0; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { final long refWord = slots[slot]; if (refWord == 0) { @@ -267,7 +269,7 @@ protected final int doHashMapMatch( // Given that we do not delete, an empty slot means no match. return -1; } else if ( - VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode) { // Finally, verify the key bytes match and remember read positions, etc in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java index b71ebb6..c253988 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java @@ -20,7 +20,7 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastBytesHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; @@ -264,7 +264,7 @@ public boolean equalKey(byte[] keyBytes, int keyStart, int keyLength) { */ public void setMatch() { hasRows = true; - isSingleRow = KeyRef.getIsSingleFlag(refWord); + isSingleRow = KeyRef.getFlag(refWord); // We must set the position since equalKey does not leave us positioned correctly. hashMapStore.writeBuffers.setReadPoint( @@ -490,7 +490,7 @@ public long addFirst(long partialHashCode, byte[] keyBytes, int keyStart, int ke refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; } - refWord |= KeyRef.IsSingleFlag.flagOnMask; + refWord |= KeyRef.Flag.flagOnMask; // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); @@ -517,10 +517,10 @@ public long addMore(long refWord, byte[] valueBytes, int valueStart, int valueLe // Where the new value record will be written. long nextAbsoluteValueOffset = writeBuffers.getWritePoint(); - if (KeyRef.getIsSingleFlag(refWord)) { + if (KeyRef.getFlag(refWord)) { // Mark reference as having more than 1 value. - refWord &= KeyRef.IsSingleFlag.flagOffMask; + refWord &= KeyRef.Flag.flagOffMask; // Write zeros to indicate no 3rd record. writeBuffers.write(RelativeOffset.zeroPadding); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 5ec90b4..f3e09e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -23,6 +23,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -63,16 +65,16 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr boolean isNewKey; long refWord; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { refWord = slots[slot]; if (refWord == 0) { isNewKey = true; break; } - if (VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode && - VectorMapJoinFastBytesHashKeyRef.equalKey( + VectorHashKeyRef.equalKey( refWord, keyBytes, keyStart, keyLength, writeBuffers, unsafeReadPos)) { isNewKey = false; break; @@ -132,7 +134,7 @@ protected final void doHashMultiSetContains( long probeSlot = slot; int i = 0; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { final long refWord = slots[slot]; if (refWord == 0) { @@ -140,7 +142,7 @@ protected final void doHashMultiSetContains( // Given that we do not delete, an empty slot means no match. return; } else if ( - VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode) { // Finally, verify the key bytes match and remember the set membership count in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java index 20fa03a..a98901e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java @@ -20,7 +20,7 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastBytesHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.Position; @@ -163,7 +163,7 @@ public boolean equalKey(byte[] keyBytes, int keyStart, int keyLength) { * if necessary. */ public void setContains() { - isSingleCount = KeyRef.getIsSingleFlag(refWord); + isSingleCount = KeyRef.getFlag(refWord); if (isSingleCount) { count = 1; @@ -227,7 +227,7 @@ public long addFirst(long partialHashCode, byte[] keyBytes, int keyStart, int ke refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; } - refWord |= KeyRef.IsSingleFlag.flagOnMask; + refWord |= KeyRef.Flag.flagOnMask; // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); @@ -252,7 +252,7 @@ public long bumpCount(long refWord, WriteBuffers.Position unsafeReadPos) { countAbsoluteOffset, unsafeReadPos); // Mark reference as having more than 1 as the count. - refWord &= KeyRef.IsSingleFlag.flagOffMask; + refWord &= KeyRef.Flag.flagOffMask; // Save current write position. final long saveAbsoluteOffset = writeBuffers.getWritePoint(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 7c73aa6..ae5290e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -21,6 +21,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.io.BytesWritable; @@ -58,16 +60,16 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr boolean isNewKey; long refWord; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { refWord = slots[slot]; if (refWord == 0) { isNewKey = true; break; } - if (VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode && - VectorMapJoinFastBytesHashKeyRef.equalKey( + VectorHashKeyRef.equalKey( refWord, keyBytes, keyStart, keyLength, writeBuffers, unsafeReadPos)) { isNewKey = false; break; @@ -123,7 +125,7 @@ protected final void doHashSetContains( long probeSlot = slot; int i = 0; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { final long refWord = slots[slot]; if (refWord == 0) { @@ -131,7 +133,7 @@ protected final void doHashSetContains( // Given that we do not delete, an empty slot means no match. return; } else if ( - VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode) { // Finally, verify the key bytes match and implicitly remember the set existence in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java index 1a78688..6370b21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java @@ -20,7 +20,8 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastBytesHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.Position; @@ -31,50 +32,7 @@ * Used by VectorMapJoinFastBytesHashSet to store the key and count for a hash set with * a bytes key. */ -public class VectorMapJoinFastBytesHashSetStore implements MemoryEstimate { - - private WriteBuffers writeBuffers; - - /** - * A store for a bytes key for a hash set in memory. - * - * The memory is a "infinite" byte array as a WriteBuffers object. - * - * We give the client (e.g. hash set logic) a 64-bit key and count reference to keep that - * has the offset within the "infinite" byte array of the key. The 64 bits includes about half - * of the upper hash code to help during matching. - * - * We optimize the common case when the key length is short and store that information in the - * 64 bit reference. - * - * Cases: - * - * 1) One element when key and is small (and stored in the reference word): - * - * Key and Value Reference - * | - * | absoluteOffset - * | - * | - * v - * - * KEY - * - * 2) One element, general: shows optional big key length. - * - * Key and Value Reference - * | - * | absoluteOffset - * | - * | - * v - * [Big Key Length] - * optional KEY - */ - - public WriteBuffers getWriteBuffers() { - return writeBuffers; - } +public class VectorMapJoinFastBytesHashSetStore extends VectorKeyStore { /** * A hash set result for the key. @@ -164,56 +122,7 @@ public String toString() { } } - /** - * Two 64-bit long result is the key and value reference. - * @param partialHashCode - * @param keyBytes - * @param keyStart - * @param keyLength - */ - public long add(long partialHashCode, byte[] keyBytes, int keyStart, int keyLength) { - - // We require the absolute offset to be non-zero so the 64 key and value reference is non-zero. - // So, we make it the offset after the relative offset and to the key. - final long absoluteOffset = writeBuffers.getWritePoint(); - - // NOTE: In order to guarantee the reference word is non-zero, later we will set the - // NOTE: single flag. - - boolean isKeyLengthBig = (keyLength >= KeyRef.SmallKeyLength.threshold); - if (isKeyLengthBig) { - writeBuffers.writeVInt(keyLength); - } - writeBuffers.write(keyBytes, keyStart, keyLength); - - /* - * Form 64 bit key and value reference. - */ - long refWord = partialHashCode; - - refWord |= absoluteOffset << KeyRef.AbsoluteOffset.bitShift; - - if (isKeyLengthBig) { - refWord |= KeyRef.SmallKeyLength.allBitsOnBitShifted; - } else { - refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; - } - - refWord |= KeyRef.IsSingleFlag.flagOnMask; - - // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); - - return refWord; - } - public VectorMapJoinFastBytesHashSetStore(int writeBuffersSize) { - writeBuffers = new WriteBuffers(writeBuffersSize, KeyRef.AbsoluteOffset.maxSize); - } - - @Override - public long getEstimatedMemorySize() { - long size = 0; - size += writeBuffers == null ? 0 : writeBuffers.getEstimatedMemorySize(); - return size; + super(writeBuffersSize); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 3d45a54..11214e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; @@ -73,7 +75,7 @@ protected void expandAndRehash() { final long refWord = slots[slot]; if (refWord != 0) { final long hashCode = - VectorMapJoinFastBytesHashKeyRef.calculateHashCode( + VectorHashKeyRef.calculateHashCode( refWord, writeBuffers, unsafeReadPos); // Copy to new slot table. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 51b186c..280593c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -97,6 +97,56 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyDecimal64MaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyDecimal64MinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyDecimal64SumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyLongSumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDecimal64MaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDecimal64MinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDecimal64SumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyLongSumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyDecimal64MaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyDecimal64MinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyDecimal64SumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyLongSumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyDecimal64MaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyDecimal64MinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyDecimal64SumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyLongSumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDecimal64MaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDecimal64MinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDecimal64SumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyLongSumColumnOperator; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -134,9 +184,15 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate.CountAggregateKind; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.WordAggregate.WordAggregateKind; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.WordAggregate; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc; @@ -312,6 +368,12 @@ private EnabledOverride vectorizationEnabledOverride; boolean isTestForcedVectorizationEnable; + boolean isVectorizationGroupByNativeEnabled; + private EnabledOverride vectorizationGroupByNativeEnabledOverride; + boolean isTestForcedVectorizationGroupByNativeEnable; + boolean weCanAttemptGroupByNativeVectorization; + int testGroupByMaxMemoryAvailable; + private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; private boolean useRowDeserialize; @@ -2040,7 +2102,7 @@ private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOpera LOG.info("Validating and vectorizing MapWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); - // Set "global" member indicating where to store "not vectorized" information if necessary. + // Set "global" member indicating where to store " not vectorized" information if necessary. currentBaseWork = mapWork; if (!validateTableScanOperator(tableScanOperator, mapWork)) { @@ -2428,6 +2490,44 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } + // Native Vector GROUP BY. + isVectorizationGroupByNativeEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED); + + final String testVectorizationGroupByNativeOverrideString = + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE); + vectorizationGroupByNativeEnabledOverride = + EnabledOverride.nameMap.get(testVectorizationGroupByNativeOverrideString); + + isTestForcedVectorizationGroupByNativeEnable = false; + switch (vectorizationGroupByNativeEnabledOverride) { + case NONE: + weCanAttemptGroupByNativeVectorization = isVectorizationGroupByNativeEnabled; + break; + case DISABLE: + weCanAttemptGroupByNativeVectorization = false; + break; + case ENABLE: + weCanAttemptGroupByNativeVectorization = true; + isTestForcedVectorizationGroupByNativeEnable = !isVectorizationGroupByNativeEnabled; + + // Different parts of the code rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED, true); + isVectorizationGroupByNativeEnabled = true; + break; + default: + throw new RuntimeException("Unexpected vectorization enabled override " + + vectorizationGroupByNativeEnabledOverride); + } + + testGroupByMaxMemoryAvailable = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE); + + // Input Format control. useVectorizedInputFileFormat = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); @@ -2649,7 +2749,10 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDescNoComplex(desc.getKeys(), "Key"); + + // Allow Complex Type key expressions here because we may specialize. + // Later we will verify again. + boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); if (!ret) { return false; } @@ -3942,6 +4045,603 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi return result; } + public static Operator specializeGroupByOperator( + Operator op, VectorizationContext vContext, + GroupByDesc desc, VectorGroupByDesc vectorDesc) + throws HiveException { + + VectorGroupByInfo vectorGroupByInfo = vectorDesc.getVectorGroupByInfo(); + + Operator vectorOp = null; + Class> opClass = null; + + VectorGroupByInfo.HashTableKeyType hashTableKeyType = + vectorGroupByInfo.getHashTableKeyType(); + + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + switch (aggregationVariation) { + case HASH_DUPLICATE_REDUCTION: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyDuplicateReductionOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyDuplicateReductionOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyDuplicateReductionOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyDuplicateReductionOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyDuplicateReductionOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + + case HASH_COUNT: + { + CountAggregate countAggregate = vectorGroupByInfo.getCountAggregation(); + CountAggregateKind countAggregateKind = countAggregate.getCountAggregationKind(); + + switch (countAggregateKind) { + case COUNT_STAR: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyCountStarOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyCountStarOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyCountStarOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyCountStarOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyCountStarOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_KEY: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyCountKeyOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyCountKeyOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyCountKeyOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyCountKeyOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyCountKeyOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_COLUMN: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyCountColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyCountColumnOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyCountColumnOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyCountColumnOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyCountColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + default: + throw new RuntimeException( + "Unexpected count aggregation kind " + countAggregateKind); + } + } + break; + + case HASH_WORD: + { + WordAggregate wordAggregate = vectorGroupByInfo.getWordAggregation(); + WordAggregateKind wordAggregateKind = wordAggregate.getWordAggregateKind(); + ColumnVector.Type inputColVectorType = wordAggregate.getInputColVectorType(); + switch (hashTableKeyType) { + case LONG: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashLongKeyLongMaxColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashLongKeyDecimal64MaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashLongKeyLongMinColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashLongKeyDecimal64MinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashLongKeyLongSumColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashLongKeyDecimal64SumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + case DECIMAL_64: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashDecimal64KeyLongMaxColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyDecimal64MaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashDecimal64KeyLongMinColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyDecimal64MinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashDecimal64KeyLongSumColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyDecimal64SumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + case STRING: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashStringKeyLongMaxColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashStringKeyDecimal64MaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashStringKeyLongMinColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashStringKeyDecimal64MinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashStringKeyLongSumColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashStringKeyDecimal64SumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + case SINGLE_KEY: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashSingleKeyLongMaxColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashSingleKeyDecimal64MaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashSingleKeyLongMinColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashSingleKeyDecimal64MinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashSingleKeyLongSumColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashSingleKeyDecimal64SumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + case MULTI_KEY: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashMultiKeyLongMaxColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashMultiKeyDecimal64MaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashMultiKeyLongMinColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashMultiKeyDecimal64MinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashMultiKeyLongSumColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashMultiKeyDecimal64SumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + } + break; + + default: + throw new RuntimeException("Unexpected aggregation variation " + aggregationVariation); + } + + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + vectorDesc.setIsNative(true); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), desc, vContext, vectorDesc); + LOG.info("Vectorizer vectorizeOperator group by class " + vectorOp.getClass().getSimpleName()); + + return vectorOp; + } + + private ImmutablePair checkSupportedWordAggregate( + VectorAggregationDesc vecAggrDesc) { + + final String aggregationName = vecAggrDesc.getAggregationName(); + + final WordAggregateKind wordAggregateKind; + switch (aggregationName) { + case "max": + wordAggregateKind = WordAggregateKind.MAX; + break; + case "min": + wordAggregateKind = WordAggregateKind.MIN; + break; + case "sum": + wordAggregateKind = WordAggregateKind.SUM; + break; + default: + return new ImmutablePair(null, aggregationName + " not implemented"); + } + + ColumnVector.Type inputColVectorType = vecAggrDesc.getInputColVectorType(); + if (inputColVectorType != ColumnVector.Type.LONG && + inputColVectorType != ColumnVector.Type.DECIMAL_64) { + return new ImmutablePair( + null, "input column vector type " + inputColVectorType + " for " + + aggregationName + " not implemented"); + } + + ColumnVector.Type outputColVectorType = vecAggrDesc.getOutputColVectorType(); + if (outputColVectorType != ColumnVector.Type.LONG && + outputColVectorType != ColumnVector.Type.DECIMAL_64) { + return new ImmutablePair( + null, "output column vector type " + outputColVectorType + " for " + + aggregationName + " not implemented"); + } + + return + new ImmutablePair( + new WordAggregate( + vecAggrDesc.getInputExpression().getOutputColumnNum(), + wordAggregateKind, + inputColVectorType, + vecAggrDesc.getOutputTypeInfo()), null); + } + + private boolean canSpecializeGroupBy(GroupByDesc desc, VectorGroupByDesc vectorDesc, + boolean isTezOrSpark, VectorizationContext vContext) throws HiveException { + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + + List vectorizationIssueList = new ArrayList(); + + List keyDescs = desc.getKeys(); + final boolean isEmptyKey = keyDescs.isEmpty(); + final int outputKeyLength = keyDescs.size(); + + GroupByDesc.Mode groupByMode = desc.getMode(); + ProcessingMode processingMode = vectorDesc.getProcessingMode(); + + VectorExpression[] vecKeyExprs = vectorDesc.getKeyExpressions(); + final int vecKeyExprSize = vecKeyExprs.length; + boolean isSingleColumnKey = (vecKeyExprSize == 1); + + VectorAggregationDesc[] vecAggrDescs = vectorDesc.getVecAggrDescs(); + final int vecAggrDescSize = (vecAggrDescs == null ? 0 : vecAggrDescs.length); + + List aggrDescList = desc.getAggregators(); + + boolean isHash = (groupByMode == GroupByDesc.Mode.HASH); + final AggregationVariation aggregationVariation; + + CountAggregate countAggregate = null; + WordAggregate wordAggregate = null; + + if (!isHash) { + + // FUTURE: For now, we only do specialized implementations for HASH mode. + + aggregationVariation = AggregationVariation.NONE; + + } else if (vecAggrDescSize == 0) { + + // No aggregations just means the key is being grouped. We are getting rid of duplicate keys. + + aggregationVariation = AggregationVariation.HASH_DUPLICATE_REDUCTION; + + } else if (vecAggrDescSize == 1) { + + // Single COUNT, or single {MAX|MIN|SUM} word-sized aggregation on a supported data type? + + if (aggrDescList.get(0).getGenericUDAFName().equalsIgnoreCase("count")) { + + // Single COUNT aggregation specialization. Store key and count in hash table without a + // hash element. + + AggregationDesc countAggrDesc = aggrDescList.get(0); + List countParamList = countAggrDesc.getParameters(); + final int countParamSize = countParamList.size(); + if (countParamSize == 0) { + + // COUNT(*) + + aggregationVariation = AggregationVariation.HASH_COUNT; + countAggregate = + new CountAggregate(CountAggregateKind.COUNT_STAR); + + } else if (countParamSize == 1) { + + aggregationVariation = AggregationVariation.HASH_COUNT; + + VectorAggregationDesc countVecAggrDesc = vecAggrDescs[0]; + + final int inputColumnNum = countVecAggrDesc.getInputExpression().getOutputColumnNum(); + + boolean isKey = false; + for (VectorExpression vecKeyExpr : vecKeyExprs) { + if (vecKeyExpr.getOutputColumnNum() == inputColumnNum) { + isKey = true; + break; + } + } + if (isKey) { + countAggregate = + new CountAggregate(CountAggregateKind.COUNT_KEY); + } else { + countAggregate = + new CountAggregate(CountAggregateKind.COUNT_COLUMN, inputColumnNum); + } + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add( + "Cannot specialize aggregation function " + countAggrDesc.getGenericUDAFName() + + " that has more than 1 input parameter"); + } + } else { + + // Single {MAX|MIN|SUM} on a supported {LONG|DECIMAL_64} data type? + ImmutablePair pair = + checkSupportedWordAggregate(vecAggrDescs[0]); + if (pair.left != null) { + + aggregationVariation = AggregationVariation.HASH_WORD; + wordAggregate = pair.left; + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add(pair.right); + } + } + } else { + + // FUTURE: Perhaps more aggregation variations will be supported... + aggregationVariation = AggregationVariation.NONE; + } + + // TEMPORARY: Restriction + + final VectorGroupByInfo.HashTableKeyType hashTableKeyType; + if (isSingleColumnKey) { + ColumnVector.Type colVectorType = vecKeyExprs[0].getOutputColumnVectorType(); + switch (colVectorType) { + case LONG: + + // Integer family, date, interval year month. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.LONG; + break; + + case DECIMAL_64: + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.DECIMAL_64; + break; + + case BYTES: + + // String family. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.STRING; + break; + + default: + + // All other data types get serialized. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.SINGLE_KEY; + break; + } + } else { + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.MULTI_KEY; + } + + vectorGroupByInfo.setIsVectorizationGroupByNativeEnabled( + weCanAttemptGroupByNativeVectorization); + vectorGroupByInfo.setEngine(engine); + + vectorGroupByInfo.setVectorizationIssueList(vectorizationIssueList); + + vectorGroupByInfo.setAggregationVariation(aggregationVariation); + + vectorGroupByInfo.setCountAggregate(countAggregate); + vectorGroupByInfo.setWordAggregate(wordAggregate); + + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(testGroupByMaxMemoryAvailable); + + // So EXPLAIN VECTORIZATION can show native conditions, etc. + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + if (!weCanAttemptGroupByNativeVectorization || + !isTezOrSpark || + (aggregationVariation == AggregationVariation.NONE) || + groupByMode != GroupByDesc.Mode.HASH || + desc.isGroupingSetsPresent() || + vectorizationIssueList.size() > 0) { + return false; + } + + return true; + } + private Operator specializeReduceSinkOperator( Operator op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkDesc vectorDesc) throws HiveException { @@ -4534,7 +5234,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } // No support for DECIMAL_64 input. We must convert. - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; // Fall through... @@ -4550,7 +5251,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { // we have to make sure same decimal type should be used during bloom filter creation // and bloom filter probing if (aggregationName.equals("bloom_filter")) { - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; } final VectorAggregationDesc vecAggrDesc = @@ -4562,7 +5264,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } // No support for DECIMAL_64 input. We must convert. - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; // Fall through... @@ -4600,16 +5303,30 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { - ImmutablePair,String> pair = + String issue = + doVectorizeGroupByOperatorPreparation( + groupByOp, vContext, vectorGroupByDesc); + Preconditions.checkState(issue == null); + return doVectorizeGroupByOperator( groupByOp, vContext, vectorGroupByDesc); - return pair.left; + } + + private static Operator doVectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext, + VectorGroupByDesc vectorGroupByDesc) + throws HiveException { + Operator vectorOp = + OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), (GroupByDesc) groupByOp.getConf(), + vContext, vectorGroupByDesc); + return vectorOp; } /* * NOTE: The VectorGroupByDesc has already been allocated and will be updated here. */ - private static ImmutablePair,String> doVectorizeGroupByOperator( + public static String doVectorizeGroupByOperatorPreparation( Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { @@ -4618,9 +5335,10 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { List keysDesc = groupByDesc.getKeys(); - // For now, we don't support group by on DECIMAL_64 keys. + // Allow DECIMAL_64 key expressions in preparation because we may specialize. + // Later we will verify again. VectorExpression[] vecKeyExpressions = - vContext.getVectorExpressionsUpConvertDecimal64(keysDesc); + vContext.getVectorExpressions(keysDesc); ArrayList aggrDesc = groupByDesc.getAggregators(); final int size = aggrDesc.size(); @@ -4631,7 +5349,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { ImmutablePair pair = getVectorAggregationDesc(aggDesc, vContext); if (pair.left == null) { - return new ImmutablePair, String>(null, pair.right); + return pair.right; } vecAggrDescs[i] = pair.left; @@ -4642,11 +5360,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); vectorGroupByDesc.setVecAggrDescs(vecAggrDescs); vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); - Operator vectorOp = - OperatorFactory.getVectorOperator( - groupByOp.getCompilationOpContext(), groupByDesc, - vContext, vectorGroupByDesc); - return new ImmutablePair, String>(vectorOp, null); + + return null; // No issue. } public static Operator vectorizeSelectOperator( @@ -4723,7 +5438,8 @@ private static VectorExpression fixDecimalDataTypePhysicalVariations(final Vecto oldExpression = children[i]; // we found at least one children with mismatch if (oldExpression.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.DECIMAL_64) { - newExpression = vContext.wrapWithDecimal64ToDecimalConversion(oldExpression); + newExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(oldExpression, vContext); children[i] = newExpression; inputArgsChanged = true; dataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE; @@ -4753,8 +5469,9 @@ private static VectorExpression fixDecimalDataTypePhysicalVariations(final Vecto arguments[arguments.length - 1] = parent.getOutputColumnNum(); } // re-instantiate the parent expression with new arguments - VectorExpression newParent = vContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), - parent.getOutputDataTypePhysicalVariation(), arguments); + VectorExpression newParent = + VectorizationContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), + parent.getOutputDataTypePhysicalVariation(), vContext, arguments); newParent.setOutputTypeInfo(parent.getOutputTypeInfo()); newParent.setOutputDataTypePhysicalVariation(parent.getOutputDataTypePhysicalVariation()); newParent.setInputTypeInfos(parent.getInputTypeInfos()); @@ -5298,23 +6015,51 @@ private static VectorPTFInfo createVectorPTFInfo(Operator,String> pair = - doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); - if (pair.left == null) { - setOperatorIssue(pair.right); + String issue = + doVectorizeGroupByOperatorPreparation(op, vContext, vectorGroupByDesc); + if (issue != null) { + setOperatorIssue(issue); throw new VectorizerCannotVectorizeException(); } - vectorOp = pair.left; - isNative = false; + + GroupByDesc groupByDesc = (GroupByDesc) op.getConf(); + boolean specialize = + canSpecializeGroupBy(groupByDesc, vectorGroupByDesc, isTezOrSpark, vContext); + + if (!specialize) { + + // Re-validate -- this time do not allow Complex Type keys. + boolean isNoComplexTypeKey = + validateExprNodeDescNoComplex(groupByDesc.getKeys(), "Key"); + if (!isNoComplexTypeKey) { + throw new VectorizerCannotVectorizeException(); + } + + // Regular VectorGroupByOperator does not support DECIMAL_64 keys. + VectorizationContext.upConvertDecimal64( + vectorGroupByDesc.getKeyExpressions(), vContext); + + vectorOp = + doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); + isNative = false; + + } else { + + vectorOp = + specializeGroupByOperator(op, vContext, groupByDesc, vectorGroupByDesc); + isNative = true; + } if (vectorTaskColumnInfo != null) { VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); if (usesVectorUDFAdaptor(vecKeyExpressions)) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); - for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { - if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { - vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + if (vecAggrDescs != null) { + for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { + if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 31237c8..f5c86d1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; @@ -31,7 +33,10 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; - +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate.CountAggregateKind; /** * GroupByDesc. @@ -324,26 +329,38 @@ public Object clone() { this.groupingSetPosition, this.isDistinct); } + // Use LinkedHashSet to give predictable display order. + private static final Set vectorizableGroupByNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { private final GroupByDesc groupByDesc; private final VectorGroupByDesc vectorGroupByDesc; + private final VectorGroupByInfo vectorGroupByInfo; + + private VectorizationCondition[] nativeConditions; public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorGroupByDesc vectorGroupByDesc) { - // Native vectorization not supported. - super(vectorGroupByDesc, false); + super(vectorGroupByDesc, vectorGroupByDesc.isNative()); this.groupByDesc = groupByDesc; this.vectorGroupByDesc = vectorGroupByDesc; + vectorGroupByInfo = vectorGroupByDesc.getVectorGroupByInfo(); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getKeysExpression() { return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getAggregators() { + if (isNative) { + return null; + } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); List vecAggrList = new ArrayList(vecAggrDescs.length); for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { @@ -352,17 +369,20 @@ public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, return vecAggrList; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProcessingMode() { return vectorGroupByDesc.getProcessingMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getGroupByMode() { return groupByDesc.getMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getVectorOutputConditionsNotMet() { List results = new ArrayList(); @@ -379,13 +399,110 @@ public String getGroupByMode() { return results; } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProjectedOutputColumnNums() { return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorGroupByInfo.getIsVectorizationGroupByNativeEnabled(); + + String engine = vectorGroupByInfo.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableGroupByNativeEngines; + boolean engineInSupported = vectorizableGroupByNativeEngines.contains(engine); + + final List vectorizationIssueList = vectorGroupByInfo.getVectorizationIssueList(); + + List conditionList = new ArrayList(); + conditionList.add( + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED.varname)); + conditionList.add( + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName)); + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + conditionList.add( + new VectorizationCondition( + (aggregationVariation == AggregationVariation.HASH_COUNT || + aggregationVariation == AggregationVariation.HASH_DUPLICATE_REDUCTION || + aggregationVariation == AggregationVariation.HASH_WORD), + "Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate")); + conditionList.add( + new VectorizationCondition( + (vectorGroupByDesc.getProcessingMode() == ProcessingMode.HASH), + "Group By Mode HASH")); + conditionList.add( + new VectorizationCondition( + !groupByDesc.isGroupingSetsPresent(), + "No Grouping Sets")); + if (vectorizationIssueList.size() != 0) { + conditionList.add( + new VectorizationCondition( + true, + "Has issues \"" + + vectorizationIssueList.toString() + "\"")); + } + + VectorizationCondition[] conditions = + conditionList.toArray(new VectorizationCondition[0]); + + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "countAggreation", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getCountAggreation() { + if (!isNative) { + return null; + } + final CountAggregateKind countAggregateKind = + vectorGroupByInfo.getCountAggregation().getCountAggregationKind(); + if (countAggregateKind == CountAggregateKind.NONE) { + return null; + } + return countAggregateKind.name(); + } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public GroupByOperatorExplainVectorization getGroupByVectorization() { VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) getVectorDesc(); if (vectorGroupByDesc == null) { @@ -404,11 +521,14 @@ public static String getComplexTypeEnabledCondition( public static String getComplexTypeWithGroupByEnabledCondition( boolean isVectorizationComplexTypesEnabled, boolean isVectorizationGroupByComplexTypesEnabled) { - final boolean enabled = (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + final boolean enabled = + (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); return "(" + - HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationComplexTypesEnabled + " AND " + - HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationGroupByComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationGroupByComplexTypesEnabled + ") IS " + enabled; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index caf0c67..b7e60f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -67,8 +67,12 @@ private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isNative; + private VectorGroupByInfo vectorGroupByInfo; + public VectorGroupByDesc() { - this.processingMode = ProcessingMode.NONE; + processingMode = ProcessingMode.NONE; + isNative = false; } public void setProcessingMode(ProcessingMode processingMode) { @@ -78,6 +82,14 @@ public ProcessingMode getProcessingMode() { return processingMode; } + public void setIsNative(boolean isNative) { + this.isNative = isNative; + } + + public boolean isNative() { + return isNative; + } + public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } @@ -118,6 +130,14 @@ public boolean getIsVectorizationGroupByComplexTypesEnabled() { return isVectorizationGroupByComplexTypesEnabled; } + public void setVectorGroupByInfo(VectorGroupByInfo vectorGroupByInfo) { + this.vectorGroupByInfo = vectorGroupByInfo; + } + + public VectorGroupByInfo getVectorGroupByInfo() { + return vectorGroupByInfo; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java new file mode 100644 index 0000000..1dffbfc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java @@ -0,0 +1,217 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorGroupByInfo. + * + * A convenience data structure that has information needed to vectorize group by. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and agains by the VectorGroupByOperator's + * constructors and later during execution. + */ +public class VectorGroupByInfo { + + private static long serialVersionUID = 1L; + + public enum HashTableKeyType { + NONE, + LONG, + DECIMAL_64, + STRING, + SINGLE_KEY, + MULTI_KEY + } + + //------------------------------------------------------------------------------------------------ + + public enum AggregationVariation { + NONE, + HASH_DUPLICATE_REDUCTION, + HASH_WORD, + HASH_COUNT + } + + public static class CountAggregate { + + public enum CountAggregateKind { + NONE, + COUNT_STAR, + COUNT_KEY, + COUNT_COLUMN + } + + private final CountAggregateKind countAggregateKind; + private final int countColumnNum; + + public CountAggregate(CountAggregateKind countAggregateKind) { + this.countAggregateKind = countAggregateKind; + countColumnNum = -1; + } + + public CountAggregate(CountAggregateKind countAggregateKind, + int countColumnNum) { + this.countAggregateKind = countAggregateKind; + this.countColumnNum = countColumnNum; + } + + public CountAggregateKind getCountAggregationKind() { + return countAggregateKind; + } + + public int getCountColumnNum() { + return countColumnNum; + } + } + + public static class WordAggregate { + + public enum WordAggregateKind { + NONE, + MAX, + MIN, + SUM + } + + private final int wordAggregateColumnNum; + private final WordAggregateKind wordAggregateKind; + private final ColumnVector.Type inputColVectorType; + private final TypeInfo outputTypeInfo; + + public WordAggregate(int wordAggregateColumnNum, WordAggregateKind wordAggregateKind, + ColumnVector.Type inputColVectorType, TypeInfo outputTypeInfo) { + this.wordAggregateColumnNum = wordAggregateColumnNum; + this.wordAggregateKind = wordAggregateKind; + this.inputColVectorType = inputColVectorType; + this.outputTypeInfo = outputTypeInfo; + } + + public int getWordAggregateColumnNum() { + return wordAggregateColumnNum; + } + + public WordAggregateKind getWordAggregateKind() { + return wordAggregateKind; + } + + public ColumnVector.Type getInputColVectorType() { + return inputColVectorType; + } + + public TypeInfo getOutputTypeInfo() { + return outputTypeInfo; + } + } + + //--------------------------------------------------------------------------- + + private boolean isVectorizationGroupByNativeEnabled; + private String engine; + + private List vectorizationIssueList; + + private AggregationVariation aggregationVariation; + + private CountAggregate countAggregate; + private WordAggregate wordAggregate; + + private HashTableKeyType hashTableKeyType; + + private int testGroupByMaxMemoryAvailable; + + public VectorGroupByInfo() { + isVectorizationGroupByNativeEnabled = false; + + vectorizationIssueList = null; + + hashTableKeyType = HashTableKeyType.NONE; + + testGroupByMaxMemoryAvailable = -1; + } + + public boolean getIsVectorizationGroupByNativeEnabled() { + return isVectorizationGroupByNativeEnabled; + } + + public void setIsVectorizationGroupByNativeEnabled(boolean isVectorizationGroupByNativeEnabled) { + this.isVectorizationGroupByNativeEnabled = isVectorizationGroupByNativeEnabled; + } + + public String getEngine() { + return engine; + } + + public void setEngine(String engine) { + this.engine = engine; + } + + public List getVectorizationIssueList() { + return vectorizationIssueList; + } + + public void setVectorizationIssueList(List vectorizationIssueList) { + this.vectorizationIssueList = vectorizationIssueList; + } + + public void setAggregationVariation(AggregationVariation aggregationVariation) { + this.aggregationVariation = aggregationVariation; + } + + public AggregationVariation getAggregationVariation() { + return aggregationVariation; + } + + public void setCountAggregate(CountAggregate countAggregate) { + this.countAggregate = countAggregate; + } + + public CountAggregate getCountAggregation() { + return countAggregate; + } + + public void setWordAggregate(WordAggregate wordAggregate) { + this.wordAggregate = wordAggregate; + } + + public WordAggregate getWordAggregation() { + return wordAggregate; + } + + public HashTableKeyType getHashTableKeyType() { + return hashTableKeyType; + } + + public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { + this.hashTableKeyType = hashTableKeyType; + } + + public int getTestGroupByMaxMemoryAvailable() { + return testGroupByMaxMemoryAvailable; + } + + public void setTestGroupByMaxMemoryAvailable(int testGroupByMaxMemoryAvailable) { + this.testGroupByMaxMemoryAvailable = testGroupByMaxMemoryAvailable; + } +} diff --git ql/src/test/queries/clientpositive/vector_groupby_multikey.q ql/src/test/queries/clientpositive/vector_groupby_multikey.q new file mode 100644 index 0000000..c91b026 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_multikey.q @@ -0,0 +1,151 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt; +CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_multi_1a values (NULL, NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_multi_1a values (date '2207-09-16', -13); +insert into groupby_multi_1a values (date '2018-04-20', 18); + +CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt; +CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt; + +insert into groupby_multi_1a values (date '2111-10-04', -81); +insert into groupby_multi_1a values (date '2018-04-21', 19); + + + +-- *_multi_1a + +-- COUNT_KEY +-- explain vectorization operator +-- select key0, key1, count(key0, key1) from groupby_multi_1a group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- COUNT_STAR +explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1; + +-- *_multi_1a_nonull + +-- COUNT_KEY +-- select key0, key1, count(key0, key1) from groupby_multi_1a_nonull group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- COUNT_STAR +select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1; + + + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- MULTI-KEY: STRING, BOOLEAN +-- explain vectorization operator +-- select s, bo, count(s, bo) from over10k group by s, bo order by s, bo limit 10; +-- select s, bo, count(s, bo) from over10k group by s, bo order by s, bo limit 10; + +explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10; +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10; + +explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10; +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10; + +-- MULTI-KEY: TIMESTAMP, SMALLINT +-- explain vectorization operator +-- select ts, si, count(ts, si) from over10k group by ts, si order by ts, si limit 10; +-- select ts, si, count(ts, si) from over10k group by ts, si order by ts, si limit 10; + +explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10; +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10; + +explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10; +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10; + +-- MULTI-KEY: DECIMAL: BINARY +-- explain vectorization operator +-- select `dec`, bin, count(`dec`, bin) from over10k group by `dec`, bin order by `dec`, bin limit 10; +-- select `dec`, bin, count(`dec`, bin) from over10k group by `dec`, bin order by `dec`, bin limit 10; + +explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10; +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10; + +explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10; +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +-- explain vectorization operator +-- select i, b, count(i, b) from over10k group by i, b order by i, b limit 10; +-- select i, b, count(i, b) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10; +select i, b, count(si) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10; +select i, b, count(*) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10; +select i, b from over10k group by i, b order by i, b limit 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_singlekey.q ql/src/test/queries/clientpositive/vector_groupby_singlekey.q new file mode 100644 index 0000000..b68a4c6 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_singlekey.q @@ -0,0 +1,710 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt; +CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_long_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_long_1a values (-5206670856103795573); +insert into groupby_long_1a values (800); + +CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt; +CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt; + +insert into groupby_long_1a_nonull values (-6187919478609154811); +insert into groupby_long_1a_nonull values (1000); + + + +CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt; +CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt; + +insert into groupby_long_1b values (NULL); + +insert into groupby_long_1b values (32030); +insert into groupby_long_1b values (800); + +CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt; +CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt; + +insert into groupby_long_1b_nonull values (31713); +insert into groupby_long_1b_nonull values (34); + + + +CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt; +CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt; + +insert into groupby_long_1c values (NULL, NULL); +insert into groupby_long_1c values (NULL, 'TKTKGVGFW'); +insert into groupby_long_1c values (NULL, 'NEW'); + +CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt; +CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt; + +insert into groupby_long_1c values (1928928239, NULL); +insert into groupby_long_1c values (9999, 'NEW'); + + + +-- *_long_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1a group by key order by key; +select key from groupby_long_1a group by key order by key; +select key from groupby_long_1a where key != -8460550397108077433 group by key order by key; + +-- *_long_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1a_nonull group by key; +select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1a_nonull group by key; +select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key; +select key from groupby_long_1a_nonull group by key order by key; +select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key; + +-- *_long_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b where key != 32030 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b where key != 32030 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1b group by key order by key; +select key from groupby_long_1b group by key order by key; +select key from groupby_long_1b where key != -32030 group by key order by key; + +-- *_long_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1b_nonull group by key; +select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1b_nonull group by key; +select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key; +select key from groupby_long_1b_nonull group by key order by key; +select key from groupby_long_1b_nonull where key != -32030 group by key order by key; + +-- *_long_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_COLUMN +explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1c group by key order by key; +select key from groupby_long_1c group by key order by key; +select key from groupby_long_1c where key != -32030 group by key order by key; + +-- *_long_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1c_nonull group by key; +select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1c_nonull group by key; +select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_COLUMN +select key, count(b_string) from groupby_long_1c_nonull group by key; +select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key; +select key from groupby_long_1c_nonull group by key order by key; +select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key; + + +set hive.llap.io.enabled=false; +set hive.llap.io.encode.enabled=false; + +CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a; + +-- Add a single NULL row. +insert into groupby_decimal64_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_decimal64_1a values (333.33); +insert into groupby_decimal64_1a values (800); + +CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull; + +insert into groupby_decimal64_1a_nonull values (-76.2); +insert into groupby_decimal64_1a_nonull values (100); + + +CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b; + +insert into groupby_decimal64_1b values (NULL, NULL); + +insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01); +insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800); + +CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull; + +insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02); +insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34); + + +-- *_decimal64_1a + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1a group by key; +select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1a group by key; +select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1a group by key order by key; +select key from groupby_decimal64_1a group by key order by key; +select key from groupby_decimal64_1a where key != -0.342 group by key order by key; + + +-- *_decimal64_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1a_nonull group by key; +select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1a_nonull group by key; +select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key; +select key from groupby_decimal64_1a_nonull group by key order by key; +select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key; + + +-- *_decimal64_1b + +-- COUNT_KEY +explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key; +select key, count(key) from groupby_decimal64_1b group by key; +select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key; + +-- COUNT_STAR +explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key; +select key, count(*) from groupby_decimal64_1b group by key; +select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key; + +-- COUNT_COLUMN +explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key; +select key, count(c_timestamp) from groupby_decimal64_1b group by key; +select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1b group by key order by key; +select key from groupby_decimal64_1b group by key order by key; +select key from groupby_decimal64_1b where key != 11041.91 group by key order by key; + +-- *_decimal64_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1b_nonull group by key; +select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1b_nonull group by key; +select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- COUNT_COLUMN +select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key; +select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key; +select key from groupby_decimal64_1b_nonull group by key order by key; +select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + + + +CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt; +CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_string_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt; +CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt; + +insert into groupby_string_1a_nonull values ('PXLD'); +insert into groupby_string_1a_nonull values ('AA'); + +-- Use same data as 1a. +CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt; +CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt; + +insert into groupby_string_1a values (NULL); + +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt; +CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt; + +insert into groupby_string_1b_nonull values ('PXLD'); +insert into groupby_string_1b_nonull values ('AA'); + +CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt; +CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt; + +insert into groupby_string_1c values (NULL, NULL, NULL); +insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values (NULL, '2018-04-11', NULL); + +insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL); +insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL); + +insert into groupby_string_1c values ('BB', NULL, NULL); +insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('DD', '2018-04-14', NULL); + +CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt; +CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt; + +insert into groupby_string_1c_nonull values ('SDA', NULL, NULL); +insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL); + +insert into groupby_string_1c_nonull values ('EEE', NULL, NULL); +insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888'); +insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL); + +-- *_string_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a where key != 'PXLD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a where key != 'PXLD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1a group by key order by key; +select key from groupby_string_1a group by key order by key; +select key from groupby_string_1a where key != 'PXLD' group by key order by key; + +-- *_string_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1a_nonull group by key; +select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1a_nonull group by key; +select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key; +select key from groupby_string_1a_nonull group by key order by key; +select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key; + +-- *_string_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b where key != 'MXGD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b where key != 'MXGD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1b group by key order by key; +select key from groupby_string_1b group by key order by key; +select key from groupby_string_1b where key != 'MXGD' group by key order by key; + +-- *_string_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1b_nonull group by key; +select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1b_nonull group by key; +select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key; +select key from groupby_string_1b_nonull group by key order by key; +select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key; + +-- *_string_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1c group by key order by key; +select key from groupby_string_1c group by key order by key; +select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key; + +-- *_string_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1c_nonull group by key; +select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1c_nonull group by key; +select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +select key, count(s_date) from groupby_string_1c_nonull group by key; +select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +select key, count(s_timestamp) from groupby_string_1c_nonull group by key; +select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key; +select key from groupby_string_1c_nonull group by key order by key; +select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key; + + + +CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt; +CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt; + +CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt; +CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt; + + +CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt; +CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt; + +CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt; +CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt; + + +-- *_serialize_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1a group by key order by key; +select key from groupby_serialize_1a group by key order by key; +select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key; + +-- *_serialize_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1a_nonull group by key; +select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1a_nonull group by key; +select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key; +select key from groupby_serialize_1a_nonull group by key order by key; +select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key; + +-- *_serialize_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1b group by key order by key; +select key from groupby_serialize_1b group by key order by key; +select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key; + +-- *_serialize_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1b_nonull group by key; +select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1b_nonull group by key; +select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +select key, count(c_smallint) from groupby_serialize_1b_nonull group by key; +select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +select key, count(c_string) from groupby_serialize_1b_nonull group by key; +select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key; +select key from groupby_serialize_1b_nonull group by key order by key; +select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key; + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- STRING +explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10; +select s, count(s) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10; +select s, count(ts) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10; +select s, count(*) from over10k group by s order by s limit 10; + +-- SERIALIZE TIMESTAMP +explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10; +select ts, count(ts) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10; +select ts, count(d) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10; +select ts, count(*) from over10k group by ts order by ts limit 10; + +-- SERIALIZE DECIMAL +explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10; +select i, count(i) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10; +select i, count(b) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10; +select i, count(*) from over10k group by i order by i limit 10; + +explain vectorization operator +select i from over10k group by i order by i limit 10; +select i from over10k group by i order by i limit 10; diff --git ql/src/test/queries/clientpositive/vector_number_compare_projection.q ql/src/test/queries/clientpositive/vector_number_compare_projection.q index 7abae94..ba98dba 100644 --- ql/src/test/queries/clientpositive/vector_number_compare_projection.q +++ ql/src/test/queries/clientpositive/vector_number_compare_projection.q @@ -3,6 +3,7 @@ set hive.explain.user=false; SET hive.auto.convert.join=true; set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; +set hive.vectorized.execution.groupby.native.enabled=false; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/vectorization_0.q ql/src/test/queries/clientpositive/vectorization_0.q index 543029f..4d08133 100644 --- ql/src/test/queries/clientpositive/vectorization_0.q +++ ql/src/test/queries/clientpositive/vectorization_0.q @@ -5,6 +5,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.groupby.native.enabled=false; set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 5bd3d90..fd0e1aa 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1762,6 +1762,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -1857,6 +1859,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -1871,6 +1875,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2613,6 +2619,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -2708,6 +2716,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2722,6 +2732,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 7e6e88b..64e10d3 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -2008,6 +2008,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:tinyint native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: tinyint) @@ -2393,6 +2395,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 37f8d36..91a2fae 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -79,6 +79,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -265,6 +267,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index ea94b3b..dce1196 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -132,6 +132,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -2113,6 +2115,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -3223,6 +3227,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index b1d2b33..f0bd80b 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY @@ -146,11 +146,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -164,11 +164,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -182,11 +182,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -263,6 +263,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -348,11 +350,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -379,20 +381,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -406,11 +408,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -424,11 +426,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -442,11 +444,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -473,20 +475,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -500,11 +502,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -518,11 +520,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -536,11 +538,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index 131a57a..92aa418 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -78,11 +78,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -158,11 +158,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -239,6 +239,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -351,11 +353,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2144 2145 2142 2143 2140 2141 @@ -384,11 +386,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -464,11 +466,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -545,6 +547,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -657,11 +661,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2146 2147 2144 2145 2142 2143 @@ -690,11 +694,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -770,11 +774,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -851,6 +855,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -963,11 +969,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2148 2149 2146 2147 2144 2145 diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index e3f4cc4..2f189c7 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,11 +88,11 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 @@ -171,12 +171,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -255,6 +255,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) @@ -368,12 +370,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -404,22 +406,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -434,12 +436,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -470,22 +472,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -500,12 +502,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index a02edbe..4ae4fba 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY @@ -146,11 +146,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -164,11 +164,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -182,11 +182,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -263,6 +263,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -348,11 +350,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -379,20 +381,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -406,11 +408,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -424,11 +426,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -442,11 +444,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -473,20 +475,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -500,11 +502,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -518,11 +520,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -536,11 +538,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index 52b17cf..1873ed1cd 100644 --- ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -1043,11 +1043,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -1072,7 +1072,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1174,11 +1174,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -1203,7 +1203,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 585794a..86668af 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -150,6 +150,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -293,6 +295,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -436,6 +440,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index c1af979..074a8d1 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 29e93c0..58ed02e 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -288,10 +288,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index e9ea461..fe83a8c 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -172,10 +172,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -197,7 +197,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -386,10 +386,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -411,7 +411,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -800,10 +800,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -825,7 +825,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1155,11 +1155,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 7:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1184,7 +1184,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1297,11 +1297,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 8:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1326,7 +1326,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1439,11 +1439,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1468,7 +1468,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1581,11 +1581,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1610,7 +1610,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 5a83157..beec66f 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -178,10 +178,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 23:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -203,7 +203,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Map 4 @@ -377,11 +377,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountStarOperator groupByMode: HASH keyExpressions: col 10:binary - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) @@ -406,7 +406,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index b4c0ea5..ebe904b 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -162,6 +162,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 1ba0ab6..41616eb 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -123,6 +123,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -332,6 +334,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index ec6a786..e6b6137 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -77,11 +77,11 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -106,7 +106,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -310,11 +310,11 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -339,7 +339,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f0f5fe7..0147bcb 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -973,10 +973,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -998,7 +999,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1094,11 +1095,26 @@ STAGE PLANS: TableScan alias: orc_create_complex_n0 Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:str:string, 1:mp:map, 2:lst:array, 3:strct:struct, 4:val:string, 5:ROW__ID:struct] Select Operator expressions: lst (type: array), strct (type: struct) outputColumnNames: lst, strct + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 2:array, col 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] keys: lst (type: array), strct (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1107,15 +1123,29 @@ STAGE PLANS: key expressions: _col0 (type: array), _col1 (type: struct) sort order: ++ Map-reduce partition columns: _col0 (type: array), _col1 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:array, 1:struct + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [2, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1201,11 +1231,12 @@ STAGE PLANS: Group By Operator aggregations: count(val) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: str (type: string) @@ -1232,7 +1263,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1348,11 +1379,12 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH keyExpressions: col 6:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -1379,7 +1411,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1479,12 +1511,28 @@ STAGE PLANS: TableScan alias: orc_create_complex_n0 Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:str:string, 1:mp:map, 2:lst:array, 3:strct:struct, 4:val:string, 5:ROW__ID:struct] Select Operator expressions: str (type: string), mp (type: map), lst (type: array), strct (type: struct), val (type: string) outputColumnNames: str, mp, lst, strct, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(val) + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN + groupByMode: HASH + keyExpressions: col 0:string, col 1:map, col 2:array, col 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: str (type: string), mp (type: map), lst (type: array), strct (type: struct) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1493,16 +1541,31 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:map, 2:array, 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 4:bigint Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type MAP not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index ca8232e..5899043 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1329,10 +1329,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1356,7 +1357,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1383,10 +1384,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index be1776c..c9ab60c 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -389,10 +389,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 12:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -414,7 +414,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 6704cc3..0c5d6da 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -92,6 +92,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -276,6 +278,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -494,6 +498,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -697,6 +703,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index ca61dfc..f215813 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -596,6 +596,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1218,6 +1220,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 7762e1e..bd8f126 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2388,6 +2388,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -3361,6 +3363,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3529,6 +3533,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3783,6 +3789,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DECIMAL for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3917,6 +3925,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DECIMAL for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4047,10 +4057,11 @@ STAGE PLANS: Group By Operator aggregations: count(key) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:decimal(20,10)) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4073,7 +4084,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6498,6 +6509,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -7471,6 +7484,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7639,6 +7654,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7889,10 +7906,10 @@ STAGE PLANS: Group By Operator aggregations: min(key) Group By Vectorization: - aggregators: VectorUDAFMinDecimal64(col 0:decimal(15,3)/DECIMAL_64) -> decimal(15,3)/DECIMAL_64 - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDecimal64MinColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7915,7 +7932,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8023,10 +8040,10 @@ STAGE PLANS: Group By Operator aggregations: max(key) Group By Vectorization: - aggregators: VectorUDAFMaxDecimal64(col 0:decimal(15,3)/DECIMAL_64) -> decimal(15,3)/DECIMAL_64 - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDecimal64MaxColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8049,7 +8066,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8157,10 +8174,11 @@ STAGE PLANS: Group By Operator aggregations: count(key) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:decimal(15,3)/DECIMAL_64) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8183,7 +8201,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index e72e398..026cb45 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -144,10 +144,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 195620 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) @@ -171,7 +172,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index 3ea544e..235dc29 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -145,11 +145,11 @@ STAGE PLANS: Group By Operator aggregations: max(b) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 3:bigint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongMaxColumnOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) @@ -174,7 +174,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 2319e11..d3e792d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -64,6 +64,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -205,6 +207,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -372,6 +376,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -640,6 +646,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -1004,6 +1012,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -1039,6 +1049,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index c7b3659..3b7a334 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -238,6 +240,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -400,6 +404,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -556,6 +562,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -712,6 +720,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -875,6 +885,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index ef10ace..031f9e5 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -79,6 +79,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -285,6 +287,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -501,6 +505,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -613,11 +619,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col2 (type: bigint) @@ -810,6 +817,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -922,11 +931,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col2 (type: bigint) @@ -1115,6 +1125,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1445,6 +1457,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1768,6 +1782,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1938,6 +1954,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -2010,11 +2028,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col2 (type: bigint) @@ -2159,6 +2178,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index 214b865..08331bf 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -266,6 +268,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 6bb3c10..cf91585 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -94,6 +94,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -260,6 +262,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -426,6 +430,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -592,6 +598,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -752,6 +760,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -907,10 +917,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -935,7 +946,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1050,11 +1061,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1081,7 +1093,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index 7bee405..0911e6c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -75,11 +75,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -106,7 +107,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -260,11 +261,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -291,7 +293,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -672,11 +674,11 @@ STAGE PLANS: Group By Operator aggregations: sum(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) @@ -703,7 +705,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index 50f9ad3..e88a46a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -87,6 +87,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -230,6 +232,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -399,6 +403,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index 4b736dd..dba59e1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -89,6 +89,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -341,6 +343,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -619,11 +623,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string) @@ -650,7 +655,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index ff43724..bbc0143 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -76,10 +76,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -104,7 +105,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -149,6 +150,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -271,10 +274,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -299,7 +303,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -344,6 +348,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -493,10 +499,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -521,7 +528,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -561,11 +568,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 946abaf..5be706f 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -226,6 +228,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index ab860c3..5f81555 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -244,6 +246,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -415,6 +419,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -587,6 +593,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -796,6 +804,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -960,6 +970,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1131,6 +1143,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1296,6 +1310,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1502,10 +1518,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1530,7 +1547,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1661,10 +1678,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1689,7 +1707,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1822,10 +1840,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1850,7 +1869,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1978,6 +1997,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2147,6 +2168,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2316,6 +2339,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2480,6 +2505,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index a629617..b714fc8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -89,6 +89,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -303,6 +305,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -517,6 +521,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -729,6 +735,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -931,10 +939,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -960,7 +969,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1125,11 +1134,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1157,7 +1167,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 1678546..43bd2f7 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -78,6 +78,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: category (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index a9180bc..a02a67c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -138,6 +138,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -153,10 +155,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out new file mode 100644 index 0000000..a62d283 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out @@ -0,0 +1,2501 @@ +PREHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a.key1 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 EXPRESSION [] +POSTHOOK: Lineage: groupby_multi_1a.key1 EXPRESSION [] +PREHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: Lineage: groupby_multi_1a_nonull.key0 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a_nonull.key1 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +NULL NULL 2 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +NULL NULL +PREHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a_nonull + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string), ts (type: timestamp) + outputColumnNames: bo, s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: s (type: string), bo (type: boolean) + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string, col 6:boolean + native: true + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string) + outputColumnNames: bo, s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: s (type: string), bo (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string, col 6:boolean + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), d (type: double), ts (type: timestamp) + outputColumnNames: si, d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: ts (type: timestamp), si (type: smallint) + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp, col 1:smallint + native: true + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), ts (type: timestamp) + outputColumnNames: si, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: ts (type: timestamp), si (type: smallint) + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp, col 1:smallint + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: f (type: float), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: f, dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: dec (type: decimal(4,2)), bin (type: binary) + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64, col 10:binary + native: true + Group By Operator + aggregations: count(f) + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: dec (type: decimal(4,2)), bin (type: binary) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64, col 10:binary + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: si, i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: i (type: int), b (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int, col 3:bigint + native: true + Group By Operator + aggregations: count(si) + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: i (type: int), b (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int, col 3:bigint + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: i (type: int), b (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int, col 3:bigint + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 +65536 4294967307 +65536 4294967308 +65536 4294967312 +65536 4294967317 +65536 4294967320 +65536 4294967326 +65536 4294967334 +65536 4294967336 +65536 4294967338 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index 3696cad..ef17e91 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -281,10 +281,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -309,7 +310,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -484,10 +485,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -511,7 +513,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -785,6 +787,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -851,6 +855,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: true (type: boolean) @@ -1013,6 +1019,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index 6109a65..256cee8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -339,6 +341,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -695,6 +699,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -730,6 +736,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out new file mode 100644 index 0000000..5787b2a --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out @@ -0,0 +1,12103 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +NULL +PREHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a_nonull + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +1569543799237464101 +3313583664488247651 +968819023021777205 +PREHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +3313583664488247651 +968819023021777205 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +NULL +PREHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b_nonull + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +NULL +PREHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c_nonull + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +PREHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 +1928928239 +PREHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: insert into groupby_decimal64_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1a values (333.33) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (333.33) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp EXPRESSION [] +POSTHOOK: Lineage: groupby_decimal64_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:decimal(6,3)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(6,3) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(6,3) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:decimal(6,3) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +NULL +PREHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a_nonull + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:decimal(6,3)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(6,3) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(6,3) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:decimal(6,3) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyCountKeyOperator + countAggreation: COUNT_KEY + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyCountStarOperator + countAggreation: COUNT_STAR + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: c_timestamp (type: timestamp), key (type: decimal(8,2)) + outputColumnNames: c_timestamp, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_timestamp) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyCountColumnOperator + countAggreation: COUNT_COLUMN + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +NULL +PREHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b_nonull + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:decimal(8,2) + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +NULL +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +QNCYBDW +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a_nonull + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +MXGDMBD +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +MXGD +NULL +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b_nonull + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +MXGD +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +NULL +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c_nonull + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSingleKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +NULL +PREHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a_nonull + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSingleKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSingleKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +NULL +PREHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b_nonull + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSingleKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string + native: true + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s (type: string) + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string + native: true + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashStringKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: ts (type: timestamp) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp + native: true + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: ts (type: timestamp) + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp + native: true + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: ts (type: timestamp) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: dec (type: decimal(4,2)) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64 + native: true + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: dec (type: decimal(4,2)) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64 + native: true + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: dec (type: decimal(4,2)) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64 + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByHashLongKeyCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeyCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index 8d93ddf..5247aad 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -79,10 +79,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -522,10 +523,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -550,7 +552,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -591,10 +593,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:double) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -710,10 +713,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) @@ -738,7 +742,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -785,10 +789,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out index 64292e0..7b8ca25 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -88,10 +88,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 78de680..ed35ecd 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -174,6 +174,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -309,6 +311,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index 53d5d8b..9c95dd0 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -332,10 +332,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -360,7 +361,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 9eb0893..698de87 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -145,10 +145,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -171,7 +171,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true rowBatchContext: @@ -301,10 +301,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 5:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -327,7 +327,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true rowBatchContext: @@ -544,10 +544,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -570,7 +570,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true rowBatchContext: @@ -958,10 +958,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index a8f06eb..949a0d0 100644 --- ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -157,9 +157,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -402,9 +403,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -425,7 +427,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 @@ -575,9 +577,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -598,7 +601,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 @@ -748,9 +751,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -771,7 +775,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 @@ -920,9 +924,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -943,7 +948,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 67108fe..bf02de6 100644 --- ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -150,9 +150,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -385,9 +386,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -408,7 +410,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 @@ -548,9 +550,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -571,7 +574,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 @@ -711,9 +714,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -734,7 +738,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 @@ -875,9 +879,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH mode: hash outputColumnNames: _col0 @@ -898,7 +903,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index 0999b5d..e0417bd 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -82,7 +82,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 31d7074..03f818b 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -6175,9 +6175,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6191,7 +6192,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6295,9 +6296,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6311,7 +6313,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6417,9 +6419,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6433,7 +6436,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6534,9 +6537,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6550,7 +6554,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6659,9 +6663,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6675,7 +6680,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6748,9 +6753,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6764,7 +6770,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -6868,9 +6874,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6884,7 +6891,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -6985,9 +6992,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7001,7 +7009,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -7132,9 +7140,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7148,7 +7157,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7265,9 +7274,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7281,7 +7291,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7416,9 +7426,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7432,7 +7443,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7546,9 +7557,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator @@ -7562,7 +7574,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7682,9 +7694,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7698,7 +7711,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -7712,9 +7725,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7728,7 +7742,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7869,9 +7883,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7885,7 +7900,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -8053,9 +8068,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -8078,7 +8094,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -8294,9 +8310,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -8319,7 +8336,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -8460,9 +8477,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -8476,7 +8494,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -8627,9 +8645,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -8643,7 +8662,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -8811,9 +8830,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -8827,7 +8847,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -8946,9 +8966,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -8962,7 +8983,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -9117,9 +9138,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkStringOperator @@ -9133,7 +9155,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -9252,9 +9274,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -9277,7 +9300,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9431,9 +9454,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -9456,7 +9480,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9612,9 +9636,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -9637,7 +9662,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9792,9 +9817,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -9817,7 +9843,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9976,9 +10002,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -10001,7 +10028,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -10102,9 +10129,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10127,7 +10155,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -10285,9 +10313,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -10310,7 +10339,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -10465,9 +10494,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10490,7 +10520,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -10693,9 +10723,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10718,7 +10749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -10885,9 +10916,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10910,7 +10942,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11124,9 +11156,10 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -11149,7 +11182,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11313,9 +11346,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -11338,7 +11372,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11519,9 +11553,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -11544,7 +11579,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -11570,9 +11605,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -11595,7 +11631,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11804,9 +11840,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -11829,7 +11866,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -12011,9 +12048,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -12036,7 +12074,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -12253,9 +12291,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -12278,7 +12317,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -12508,9 +12547,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -12533,7 +12573,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -12743,9 +12783,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -12768,7 +12809,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -13009,9 +13050,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13034,7 +13076,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -13290,9 +13332,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13315,7 +13358,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -13535,9 +13578,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: string) mode: hash @@ -13560,7 +13604,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -13679,9 +13723,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13704,7 +13749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -13855,9 +13900,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13880,7 +13926,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -14033,9 +14079,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14058,7 +14105,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -14210,9 +14257,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -14235,7 +14283,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -14391,9 +14439,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -14416,7 +14465,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -14517,9 +14566,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14542,7 +14592,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -14697,9 +14747,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -14722,7 +14773,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -14874,9 +14925,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14899,7 +14951,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -15096,9 +15148,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15121,7 +15174,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15285,9 +15338,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15310,7 +15364,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15518,9 +15572,10 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15543,7 +15598,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15704,9 +15759,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -15729,7 +15785,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15904,9 +15960,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15929,7 +15986,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -15955,9 +16012,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15980,7 +16038,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -16183,9 +16241,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -16208,7 +16267,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -16390,9 +16449,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -16415,7 +16475,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -16631,9 +16691,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -16656,7 +16717,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -16879,9 +16940,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -16904,7 +16966,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -17111,9 +17173,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -17136,7 +17199,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -17374,9 +17437,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -17399,7 +17463,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -17649,9 +17713,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -17674,7 +17739,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -17891,9 +17956,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: string) mode: hash @@ -17916,7 +17982,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -18036,9 +18102,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18061,7 +18128,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18213,9 +18280,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18238,7 +18306,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18392,9 +18460,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18417,7 +18486,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18570,9 +18639,10 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -18595,7 +18665,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18752,9 +18822,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -18777,7 +18848,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18878,9 +18949,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18903,7 +18975,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -19059,9 +19131,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -19084,7 +19157,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -19237,9 +19310,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19262,7 +19336,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -19461,9 +19535,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19486,7 +19561,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -19651,9 +19726,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19676,7 +19752,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -19886,9 +19962,10 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19911,7 +19988,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -20073,9 +20150,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -20098,7 +20176,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -20275,9 +20353,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -20300,7 +20379,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -20326,9 +20405,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -20351,7 +20431,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -20556,9 +20636,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -20581,7 +20662,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -20763,9 +20844,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -20788,7 +20870,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -21005,9 +21087,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -21030,7 +21113,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -21256,9 +21339,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -21281,7 +21365,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -21489,9 +21573,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -21514,7 +21599,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -21753,9 +21838,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -21778,7 +21864,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -22030,9 +22116,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -22055,7 +22142,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -22273,9 +22360,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH keys: _col0 (type: string) mode: hash @@ -22298,7 +22386,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 3b8ee96..b98a720 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -47,10 +47,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -74,7 +75,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -146,10 +147,11 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -173,7 +175,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -325,10 +327,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -352,7 +355,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -426,10 +429,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 17:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -453,7 +457,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_null_projection.q.out ql/src/test/results/clientpositive/llap/vector_null_projection.q.out index 6372587..2271c34 100644 --- ql/src/test/results/clientpositive/llap/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_null_projection.q.out @@ -166,10 +166,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 2:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) @@ -193,7 +194,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -217,10 +218,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 2:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) @@ -244,7 +246,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 0546346..c30a19e 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -150,6 +150,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -280,6 +282,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out index 1cf8eb8..c1ab392 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out @@ -255,7 +255,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out index 5921f0a..4f5bd7f 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -1115,11 +1115,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyCountColumnOperator groupByMode: HASH keyExpressions: col 9:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1144,7 +1144,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1228,12 +1228,27 @@ STAGE PLANS: alias: nested_tbl_1 Pruned Column Paths: s1.f3 Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountColumnOperator + groupByMode: HASH + keyExpressions: col 8:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1242,16 +1257,23 @@ STAGE PLANS: key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1322,12 +1344,27 @@ STAGE PLANS: alias: nested_tbl_1 Pruned Column Paths: s1.f3 Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + className: VectorGroupByHashSingleKeyCountColumnOperator + groupByMode: HASH + keyExpressions: col 8:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1336,16 +1373,23 @@ STAGE PLANS: key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1835,10 +1879,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int, col 12:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: boolean) @@ -1862,7 +1907,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -2310,11 +2355,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 11:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 10:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2339,7 +2384,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2453,11 +2498,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2482,7 +2527,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2596,11 +2641,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 12:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 11:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2625,7 +2670,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2747,7 +2792,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + notVectorizedReason: exception: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo cannot be cast to org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo stack trace: org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getStructFieldIndex(VectorizationContext.java:915), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUDFStructField(VectorizationContext.java:903), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:883), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:764), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:752), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doVectorizeGroupByOperatorPreparation(Vectorizer.java:5341), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperator(Vectorizer.java:6019), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChild(Vectorizer.java:1043), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChildren(Vectorizer.java:929), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperatorTree(Vectorizer.java:896), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.access$2300(Vectorizer.java:303), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2162), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2114), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapWork(Vectorizer.java:2087), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.convertMapWork(Vectorizer.java:1251), ... vectorized: false Reducer 2 Execution mode: llap @@ -2844,11 +2889,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 13:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountColumnOperator groupByMode: HASH keyExpressions: col 12:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2873,7 +2918,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2995,7 +3040,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + notVectorizedReason: exception: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo cannot be cast to org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo stack trace: org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getStructFieldIndex(VectorizationContext.java:915), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUDFStructField(VectorizationContext.java:903), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:883), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:764), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:752), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doVectorizeGroupByOperatorPreparation(Vectorizer.java:5341), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperator(Vectorizer.java:6019), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChild(Vectorizer.java:1043), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChildren(Vectorizer.java:929), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperatorTree(Vectorizer.java:896), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.access$2300(Vectorizer.java:303), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2162), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2114), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapWork(Vectorizer.java:2087), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.convertMapWork(Vectorizer.java:1251), ... vectorized: false Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 50a5ada..502ac42 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -146,11 +146,11 @@ STAGE PLANS: Group By Operator aggregations: max(b) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 3:bigint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyLongMaxColumnOperator groupByMode: HASH keyExpressions: col 7:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) @@ -175,7 +175,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 2953df5..0718611 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -760,6 +760,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index c5efa9f..3b4c5d1 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -326,6 +326,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index fcde000..a5c20b0 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -291,6 +291,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[output column vector type DECIMAL for sum not implemented]" IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -512,6 +514,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2) native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[output column vector type DECIMAL for sum not implemented]" IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) @@ -1621,6 +1625,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1842,6 +1848,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2) native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 59ceb82..32496d4 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -120,10 +120,10 @@ STAGE PLANS: Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -145,7 +145,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -298,10 +298,10 @@ STAGE PLANS: Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -323,7 +323,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -476,10 +476,10 @@ STAGE PLANS: Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -501,7 +501,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -641,10 +641,10 @@ STAGE PLANS: Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -666,7 +666,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -806,10 +806,10 @@ STAGE PLANS: Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -831,7 +831,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 4711f35..0f675dc 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -457,11 +457,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -486,7 +486,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1472,11 +1472,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -1501,7 +1501,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2511,11 +2511,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -2540,7 +2540,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3058,11 +3058,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -3087,7 +3087,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4073,11 +4073,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -4102,7 +4102,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5112,11 +5112,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -5141,7 +5141,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index a8ceb64..a9a6556 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: name (type: string), age (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 62887c4..b4ab100 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -81,6 +81,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DECIMAL for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index c9bcc2c..ad7df95 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -111,10 +111,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out index 59a9196..c530de4 100644 --- ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out +++ ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out @@ -59,10 +59,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: int) @@ -86,7 +87,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index a2589c7..6e27616 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -124,6 +124,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -335,6 +337,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index fc00ed7..8c5e4ea 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -371,10 +371,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -399,7 +400,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index 4ba4d44..995d20c 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -54,11 +54,11 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -86,7 +86,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -251,10 +251,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) @@ -280,7 +281,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 8c2fb0c..7276586 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2903,6 +2903,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -3049,6 +3051,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 6932744..a25d6ac 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -61,11 +61,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -90,7 +90,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 8e8c445..48dec4e 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -275,6 +275,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DOUBLE for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int) @@ -529,6 +531,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DOUBLE for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int) @@ -4087,6 +4091,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int, col 7:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) @@ -4794,6 +4800,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DOUBLE for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) @@ -6375,6 +6383,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DOUBLE for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 5943548..2b696bb 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -49,11 +49,11 @@ STAGE PLANS: Group By Operator aggregations: sum(c_int) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string) @@ -80,7 +80,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -299,6 +299,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: int) @@ -539,6 +541,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 98b2316..544bfc3 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -58,6 +58,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -242,6 +244,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -579,6 +583,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -763,6 +769,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1100,6 +1108,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1284,6 +1294,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DOUBLE for sum not implemented]" IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1668,6 +1680,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index 59abc5d..6045f75 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -92,6 +92,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 5a8d6fa..210587a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -116,6 +116,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 4ce654f..483d921 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -118,6 +118,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -476,6 +478,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 5f1c9be..b91c470 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -118,6 +118,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 552d889..9ff2aef 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -114,6 +114,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 4b2c454..900a5ce 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -91,6 +91,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 919d1c9..fcfac59 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -96,6 +96,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index b998692..77a51a0 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -101,6 +101,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index 1a04ec2..8112d6c 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -96,6 +96,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 5d70025..efe0614 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -89,6 +89,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 4b2c454..900a5ce 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -91,6 +91,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 3dc640a..4f769d9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -304,6 +304,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -516,10 +518,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -546,7 +549,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -679,10 +682,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -708,7 +712,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -922,11 +926,11 @@ STAGE PLANS: Group By Operator aggregations: sum(ctinyint) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 5:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) @@ -953,7 +957,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index cb49ea5..08299b4 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -44,10 +44,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 13:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongSumColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -70,7 +70,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 7f1c6a2..ea2ec17 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -123,6 +123,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -390,6 +392,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -649,6 +653,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -887,6 +893,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2247,6 +2255,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2529,6 +2539,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2855,6 +2867,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3262,6 +3276,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3500,10 +3516,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3525,7 +3541,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3619,10 +3635,10 @@ STAGE PLANS: Group By Operator aggregations: count(i) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3644,7 +3660,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3810,10 +3826,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3835,7 +3851,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3929,10 +3945,10 @@ STAGE PLANS: Group By Operator aggregations: count(ctinyint) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3954,7 +3970,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4048,10 +4064,10 @@ STAGE PLANS: Group By Operator aggregations: count(cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4073,7 +4089,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4167,10 +4183,10 @@ STAGE PLANS: Group By Operator aggregations: count(cfloat) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:float) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4192,7 +4208,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4286,10 +4302,10 @@ STAGE PLANS: Group By Operator aggregations: count(cstring1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4311,7 +4327,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4405,10 +4421,10 @@ STAGE PLANS: Group By Operator aggregations: count(cboolean1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:boolean) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4430,7 +4446,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index b0e0d70..baf95f1 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -1282,6 +1282,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 0cffc4e..153b355 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -77,6 +77,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -207,10 +209,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -235,7 +238,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -288,6 +291,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index feecc27..20f164a 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -98,7 +98,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -334,7 +334,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -685,7 +685,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -729,7 +729,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1168,7 +1168,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1512,7 +1512,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1854,7 +1854,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2028,7 +2028,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2533,7 +2533,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2678,7 +2678,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3055,7 +3055,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3229,7 +3229,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3358,7 +3358,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -3537,7 +3537,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3693,7 +3693,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -3737,7 +3737,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4637,7 +4637,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 10 @@ -4693,7 +4693,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 7 @@ -5000,7 +5000,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5044,7 +5044,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5191,7 +5191,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5235,7 +5235,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -5279,7 +5279,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5414,7 +5414,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5473,7 +5473,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5605,7 +5605,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5649,7 +5649,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5770,7 +5770,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5814,7 +5814,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5935,7 +5935,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5979,7 +5979,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6102,7 +6102,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6272,7 +6272,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6316,7 +6316,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6421,7 +6421,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6576,7 +6576,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 3 @@ -6687,7 +6687,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6731,7 +6731,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -6775,7 +6775,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6963,7 +6963,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 4 @@ -7082,7 +7082,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 89986fb..09d88be 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -143,6 +143,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -386,6 +388,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -629,6 +633,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -873,6 +879,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -944,6 +952,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1219,6 +1229,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1247,6 +1259,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1519,6 +1533,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 5cd15cb..b2fa534 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -361,6 +361,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 5cc42e3..b033c19 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -81,6 +81,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 4013b36..3db7733 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -212,10 +212,10 @@ STAGE PLANS: Group By Operator aggregations: max(int_col_0) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:int) -> int - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongMaxColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -238,7 +238,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -423,10 +423,10 @@ STAGE PLANS: Group By Operator aggregations: max(int_col_0) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:int) -> int - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongMaxColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -449,7 +449,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -634,10 +634,10 @@ STAGE PLANS: Group By Operator aggregations: max(int_col_0) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:int) -> int - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyLongMaxColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -660,7 +660,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 7a3c714..1541758 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -310,6 +310,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 56e81aa..591da4d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -3818,6 +3818,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type DOUBLE for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 69b3d00..c1c1c5c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -154,6 +154,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -387,6 +389,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -531,6 +535,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 4373e9a..457a572 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1028,6 +1028,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1159,6 +1161,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[input column vector type TIMESTAMP for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1308,6 +1312,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single {MAX|MIN|SUM} {LONG|DECIMAL_64} aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 55998ae..10979c7 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -56,6 +56,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -214,6 +216,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -524,6 +528,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -682,6 +688,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -992,6 +1000,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1150,6 +1160,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1507,6 +1519,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index e6cd26c..03e4ac6 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index 2a2e1a1..cc10b53 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -114,6 +114,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index 3812239..a6db9b7 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -116,6 +116,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -449,6 +451,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index 4f6c9a5..3a41cff 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -116,6 +116,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 0b27779..8c0fa0b 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index 9876ce5..209e7af 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -89,6 +89,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 34ec44c..63dab39 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -94,6 +94,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index bf22f4c..612cb03 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -99,6 +99,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index 3d9a5cf..3cd5d25 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -94,6 +94,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 022f16c..590de3f 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -87,6 +87,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index 9876ce5..209e7af 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -89,6 +89,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index 8f03384..19faf2a 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -261,6 +261,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -417,6 +419,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -561,6 +565,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -753,6 +759,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index da2fbe7..a41a8d3 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -169,10 +169,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -193,7 +193,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -379,10 +379,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -403,7 +403,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -785,10 +785,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -809,7 +809,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1138,11 +1138,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 7:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1166,7 +1166,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1278,11 +1278,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 8:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1306,7 +1306,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1418,11 +1418,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1446,7 +1446,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1558,11 +1558,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1586,7 +1586,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 8673f37..5c1ba6a 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -152,6 +152,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index c496dce..d8f7280 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1328,10 +1328,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1354,7 +1355,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1381,10 +1382,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index a490a56..6c9448e 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -386,6 +386,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index 5c0ead3..f9ba849 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -91,6 +91,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -273,6 +275,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -489,6 +493,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -690,6 +696,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 4a6311b..99262c5 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -143,10 +143,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) @@ -169,7 +170,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index 4df04a7..439e5a8 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -149,6 +149,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index db9aee8..dcfd823 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -203,12 +203,12 @@ PREHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1a PREHOOK: Input: default@orc_table_2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 @@ -262,10 +262,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -286,7 +287,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -385,12 +386,12 @@ PREHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1a PREHOOK: Input: default@orc_table_2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 PREHOOK: query: CREATE TABLE orc_table_1b(v1 STRING, a INT) STORED AS ORC PREHOOK: type: CREATETABLE @@ -599,12 +600,12 @@ PREHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 3 PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -770,12 +771,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_ta PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 3 3 THREE PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -950,12 +951,12 @@ PREHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join or PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 6 15 THREE PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -1129,12 +1130,12 @@ PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three THREE 3 PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -1308,12 +1309,12 @@ PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 three THREE PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 @@ -1487,12 +1488,12 @@ PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three THREE 3 PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 @@ -1666,10 +1667,10 @@ PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 three THREE diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 0f56656..ad188e3 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -135,7 +135,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 9f80711..1d59e0a 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -95,10 +95,11 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -119,7 +120,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -148,10 +149,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -174,7 +176,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -378,10 +380,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 17:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -402,7 +405,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -431,10 +434,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -457,7 +461,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index d7a21f7..278efd9 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -150,6 +150,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index e6964e1..17a3a7b 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -37,11 +37,11 @@ POSTHOOK: Lineage: orc_table_2.v2 SCRIPT [] PREHOOK: query: select * from orc_table_1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from orc_table_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL one 1 @@ -51,11 +51,11 @@ two 2 PREHOOK: query: select * from orc_table_2 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from orc_table_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 0 ZERO 2 TWO 3 THREE @@ -210,12 +210,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer j PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL NULL NULL NULL NULL one 1 NULL NULL @@ -370,12 +370,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL 0 ZERO NULL NULL 4 FOUR NULL NULL NULL diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 188f157..443b693 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4a.ctinyint SIMPLE [] PREHOOK: query: select * from small_alltypesorc1a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL @@ -102,11 +102,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc2a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -115,24 +115,33 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### NULL -16306 384405526 -1645852809 NULL -16306.0 b5SoK8 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:11.105 true false NULL -16307 559926362 -1645852809 NULL -16307.0 nA8bdtWfPPQyP2hL5 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.072 false false NULL -16309 -826497289 -1645852809 NULL -16309.0 54o058c3mK6ewOQ5 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:10.761 false false NULL -16310 206154150 1864027286 NULL -16310.0 5Hy1y6 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:00.821 false true NULL -16379 -894716315 1864027286 NULL -16379.0 2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:47.059 true true +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc4a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from (select * from (select * from small_alltypesorc1a) sq1 union all @@ -187,20 +196,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_a PREHOOK: Output: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_a POSTHOOK: Output: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL @@ -370,14 +379,14 @@ left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL @@ -549,14 +558,14 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -64 -64 @@ -848,6 +857,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -934,7 +945,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -944,5 +955,5 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 145 -8960 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index d0043b6..5609ff7 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -89,24 +89,33 @@ POSTHOOK: Lineage: small_alltypesorc4a_n0.ctinyint SIMPLE [(alltypesorc)alltypes PREHOOK: query: select * from small_alltypesorc1a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -51 NULL -1064981602 -1444011153 -51.0 NULL aY3tpnr6wfvmWMG0U881 2Ol4N3Ha0815Ej54lA2N 1969-12-31 16:00:08.451 NULL false false -51 NULL -1065775394 -1331703092 -51.0 NULL aD88uS2N8DmqPlvjOa7F46i7 Ut8ka2o8iokF504065PYS 1969-12-31 16:00:08.451 NULL false true -51 NULL -1066684273 2034191923 -51.0 NULL 2W4Kg220OcCy065HG60k6e D7GOQhc3qbAR6 1969-12-31 16:00:08.451 NULL false false -51 NULL -1067683781 1750003656 -51.0 NULL IbgbUvP5 47x2I874 1969-12-31 16:00:08.451 NULL false true -51 NULL -1071480828 -1401575336 -51.0 NULL aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA 1969-12-31 16:00:08.451 NULL true true +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc2a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a_n0 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -115,24 +124,33 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL -64 -9842 253665376 NULL -64.0 -9842.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 true NULL +======= +#### A masked pattern was here #### +NULL -13166 626923679 NULL NULL -13166.0 821UdmGbkEf4j NULL 1969-12-31 15:59:55.089 1969-12-31 16:00:15.69 true NULL +NULL -14426 626923679 NULL NULL -14426.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.505 1969-12-31 16:00:13.309 true NULL +NULL -14847 626923679 NULL NULL -14847.0 821UdmGbkEf4j NULL 1969-12-31 16:00:00.612 1969-12-31 15:59:43.704 true NULL +NULL -15632 528534767 NULL NULL -15632.0 cvLH6Eat2yFsyy7p NULL NULL 1969-12-31 15:59:53.593 true NULL +NULL -15830 253665376 NULL NULL -15830.0 1cGVWH7n1QU NULL 1969-12-31 16:00:02.582 1969-12-31 16:00:00.518 true NULL +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc4a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a_n0 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -60 -200 NULL NULL -60.0 -200.0 NULL NULL 1969-12-31 16:00:11.996 1969-12-31 15:59:55.451 NULL NULL -61 -7196 NULL NULL -61.0 -7196.0 NULL 8Mlns2Tl6E0g 1969-12-31 15:59:44.823 1969-12-31 15:59:58.174 NULL false -61 -7196 NULL NULL -61.0 -7196.0 NULL fUJIN 1969-12-31 16:00:11.842 1969-12-31 15:59:58.174 NULL false @@ -192,25 +210,29 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a_n0 COMPUTE STATISTICS FOR COLU PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_a_n0 PREHOOK: Output: default@small_alltypesorc_a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a_n0 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_a_n0 POSTHOOK: Output: default@small_alltypesorc_a_n0 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -51 NULL -1064981602 -1444011153 -51.0 NULL aY3tpnr6wfvmWMG0U881 2Ol4N3Ha0815Ej54lA2N 1969-12-31 16:00:08.451 NULL false false -51 NULL -1065775394 -1331703092 -51.0 NULL aD88uS2N8DmqPlvjOa7F46i7 Ut8ka2o8iokF504065PYS 1969-12-31 16:00:08.451 NULL false true -51 NULL -1066684273 2034191923 -51.0 NULL 2W4Kg220OcCy065HG60k6e D7GOQhc3qbAR6 1969-12-31 16:00:08.451 NULL false false -51 NULL -1067683781 1750003656 -51.0 NULL IbgbUvP5 47x2I874 1969-12-31 16:00:08.451 NULL false true -51 NULL -1071480828 -1401575336 -51.0 NULL aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA 1969-12-31 16:00:08.451 NULL true true +======= +#### A masked pattern was here #### +>>>>>>> 1a04fe1... more -60 -200 NULL NULL -60.0 -200.0 NULL NULL 1969-12-31 16:00:11.996 1969-12-31 15:59:55.451 NULL NULL -61 -7196 NULL NULL -61.0 -7196.0 NULL 8Mlns2Tl6E0g 1969-12-31 15:59:44.823 1969-12-31 15:59:58.174 NULL false -61 -7196 NULL NULL -61.0 -7196.0 NULL fUJIN 1969-12-31 16:00:11.842 1969-12-31 15:59:58.174 NULL false @@ -412,6 +434,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -498,7 +522,7 @@ left outer join small_alltypesorc_a_n0 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a_n0 c left outer join small_alltypesorc_a_n0 cd @@ -508,5 +532,10 @@ left outer join small_alltypesorc_a_n0 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 24 -3110813706 +======= +#### A masked pattern was here #### +34 -26289186744 +>>>>>>> 1a04fe1... more diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index ae67597..31daa68 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -89,24 +89,33 @@ POSTHOOK: Lineage: small_alltypesorc4a_n1.ctinyint SIMPLE [(alltypesorc)alltypes PREHOOK: query: select * from small_alltypesorc1a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL -64 -9842 253665376 NULL -64.0 -9842.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 true NULL +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc2a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL NULL -1731061911 -51.0 NULL Pw53BBJ yL443x2437PO5Hv1U3lCjq2D 1969-12-31 16:00:08.451 NULL true false -51 NULL NULL -1846191223 -51.0 NULL Ul085f84S33Xd32u x1JC58g0Ukp 1969-12-31 16:00:08.451 NULL true true -51 NULL NULL -1874052220 -51.0 NULL c61B47I604gymFJ sjWQS78 1969-12-31 16:00:08.451 NULL false false @@ -115,11 +124,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL -31312632 1086455747 -51.0 NULL NULL Bc7xt12568c451o64LF5 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -337975743 608681041 -51.0 NULL NULL Ih2r28o6 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -413196097 -306198070 -51.0 NULL NULL F53QcSDPpxYF1Ub 1969-12-31 16:00:08.451 NULL NULL false @@ -128,11 +137,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc4a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -192,20 +201,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a_n1 COMPUTE STATISTICS FOR COLU PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_a_n1 PREHOOK: Output: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a_n1 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_a_n1 POSTHOOK: Output: default@small_alltypesorc_a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL -31312632 1086455747 -51.0 NULL NULL Bc7xt12568c451o64LF5 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -337975743 608681041 -51.0 NULL NULL Ih2r28o6 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -413196097 -306198070 -51.0 NULL NULL F53QcSDPpxYF1Ub 1969-12-31 16:00:08.451 NULL NULL false @@ -258,7 +267,7 @@ left outer join small_alltypesorc_a_n1 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -268,8 +277,13 @@ left outer join small_alltypesorc_a_n1 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 32 +======= +#### A masked pattern was here #### +20 +>>>>>>> 1a04fe1... more PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c @@ -302,7 +316,7 @@ left outer join small_alltypesorc_a_n1 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -312,8 +326,13 @@ left outer join small_alltypesorc_a_n1 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 24 +======= +#### A masked pattern was here #### +28 +>>>>>>> 1a04fe1... more PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c @@ -346,7 +365,7 @@ left outer join small_alltypesorc_a_n1 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -356,5 +375,10 @@ left outer join small_alltypesorc_a_n1 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 24 +======= +#### A masked pattern was here #### +28 +>>>>>>> 1a04fe1... more diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index f63af39..ce4659d 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4b.ctinyint SIMPLE [] PREHOOK: query: select * from small_alltypesorc1b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -3097 253665376 NULL -64.0 -3097.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.013 1969-12-31 16:00:06.097 true NULL @@ -107,11 +107,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc2b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -200 NULL -1809444706 -64.0 -200.0 NULL B87YVb3UASqg 1969-12-31 16:00:10.858 1969-12-31 15:59:55.451 NULL true -64 -200 NULL 2118653994 -64.0 -200.0 NULL ONHGSDy1U4Ft431DfQp15 1969-12-31 16:00:03.944 1969-12-31 15:59:55.451 NULL true -64 -200 NULL 927647669 -64.0 -200.0 NULL DhxkBT 1969-12-31 16:00:00.199 1969-12-31 15:59:55.451 NULL false @@ -125,10 +125,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3b +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### NULL -16269 -378213344 -1645852809 NULL -16269.0 sOdj1Tmvbl03f xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:15.867 false false NULL -16274 -671342269 -1645852809 NULL -16274.0 3DE7EQo4KyT0hS xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:51.469 false false @@ -140,14 +141,27 @@ NULL -16307 559926362 -1645852809 NULL -16307.0 nA8bdtWfPPQyP2hL5 xH7445Rals48VO NULL -16309 -826497289 -1645852809 NULL -16309.0 54o058c3mK6ewOQ5 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:10.761 false false NULL -16310 206154150 1864027286 NULL -16310.0 5Hy1y6 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:00.821 false true NULL -16379 -894716315 1864027286 NULL -16379.0 2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:47.059 true true +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -609074876 -1887561756 NULL NULL EcM71 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:55.061 true false +NULL NULL -700300206 -1887561756 NULL NULL kdqQE010 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:58.384 false false +NULL NULL -726473298 1864027286 NULL NULL OFy1a1xf37f75b5N 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:11.799 true true +NULL NULL -738747840 -1645852809 NULL NULL vmAT10eeE47fgH20pLi xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:11.55 true false +NULL NULL -838810013 1864027286 NULL NULL N016jPED08o 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:44.252 false true +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc4b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_alltypesorc_b stored as orc as select * from (select * from (select * from small_alltypesorc1b) sq1 union all @@ -202,20 +216,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_b PREHOOK: Output: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_b POSTHOOK: Output: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -200 NULL -1809444706 -64.0 -200.0 NULL B87YVb3UASqg 1969-12-31 16:00:10.858 1969-12-31 15:59:55.451 NULL true @@ -269,14 +283,14 @@ left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -3586 626923679 NULL -64.0 -3586.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.952 1969-12-31 15:59:51.131 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -4018 626923679 NULL -64.0 -4018.0 821UdmGbkEf4j NULL 1969-12-31 15:59:58.959 1969-12-31 16:00:07.803 true NULL @@ -354,14 +368,14 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -64 -64 @@ -804,7 +818,7 @@ left outer join small_alltypesorc_b hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -814,5 +828,5 @@ left outer join small_alltypesorc_b hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 890 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 98732ed..7db2bd8 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -28,12 +28,12 @@ PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@sorted_mod_4 PREHOOK: Output: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@sorted_mod_4 POSTHOOK: Output: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 PREHOOK: type: CREATETABLE_AS_SELECT @@ -60,12 +60,12 @@ PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_table PREHOOK: Output: default@small_table -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_table POSTHOOK: Output: default@small_table -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s @@ -95,7 +95,7 @@ on s.ctinyint = st.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -104,7 +104,7 @@ on s.ctinyint = st.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6876 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -135,7 +135,7 @@ on s.ctinyint = sm.ctinyint and s.cmodint = 2 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -144,7 +144,7 @@ on s.ctinyint = sm.ctinyint and s.cmodint = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6058 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -175,7 +175,7 @@ on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -184,7 +184,7 @@ on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6248 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -215,7 +215,7 @@ on s.ctinyint = sm.ctinyint and s.ctinyint < 100 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -224,7 +224,7 @@ on s.ctinyint = sm.ctinyint and s.ctinyint < 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6876 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* @@ -261,7 +261,7 @@ left outer join sorted_mod_4 s2 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -272,7 +272,7 @@ left outer join sorted_mod_4 s2 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3268334 PREHOOK: query: create table mod_8_mod_4 stored as orc as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc @@ -302,12 +302,12 @@ PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Output: default@mod_8_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Output: default@mod_8_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 PREHOOK: type: CREATETABLE_AS_SELECT @@ -334,12 +334,12 @@ PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_table2 PREHOOK: Output: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_table2 POSTHOOK: Output: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s @@ -369,7 +369,7 @@ on s.cmodtinyint = st.cmodtinyint PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -378,7 +378,7 @@ on s.cmodtinyint = st.cmodtinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 39112 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -409,7 +409,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -418,7 +418,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 11171 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -449,7 +449,7 @@ on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -458,7 +458,7 @@ on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 14371 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -489,7 +489,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -498,7 +498,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 17792 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* @@ -535,7 +535,7 @@ left outer join mod_8_mod_4 s2 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -546,5 +546,5 @@ left outer join mod_8_mod_4 s2 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6524438 diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 426b81a..a0cdef7 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -360,10 +360,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -387,7 +388,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index c4626e4..e09e521 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -57,6 +57,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -239,6 +241,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -572,6 +576,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -754,6 +760,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1087,6 +1095,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1269,6 +1279,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1649,6 +1661,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index 1bf314a..7a78d40 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 1643824..28e7f98 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -115,6 +115,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 34ec9c4..ffab121 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -117,6 +117,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -473,6 +475,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 780be9c..37480b5 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -117,6 +117,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 0ba15ee..5ecb3ad 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index cb7da29..7391edf 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index 7acdce1..54e027c 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index 1e78ef5..6e51fa3 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -100,6 +100,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index 08d3b0b..460c7fd 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index 82973c6..38730e7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -88,6 +88,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index cb7da29..7391edf 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index 917d130..bfd2154 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -47,6 +47,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index 44667a5..e929650 100644 --- ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -240,7 +240,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -333,7 +333,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -543,7 +543,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -633,7 +633,7 @@ STAGE PLANS: enabled: true inputFormatFeatureSupport: [] featureSupportInUse: [] - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 231dea6..617e846 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -122,6 +122,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -387,6 +389,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -644,6 +648,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -880,6 +886,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2230,6 +2238,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2510,6 +2520,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2834,6 +2846,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3239,6 +3253,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3475,10 +3491,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3499,7 +3515,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3592,10 +3608,10 @@ STAGE PLANS: Group By Operator aggregations: count(i) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3616,7 +3632,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3781,10 +3797,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3805,7 +3821,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3898,10 +3914,10 @@ STAGE PLANS: Group By Operator aggregations: count(ctinyint) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3922,7 +3938,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4015,10 +4031,10 @@ STAGE PLANS: Group By Operator aggregations: count(cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4039,7 +4055,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4132,10 +4148,10 @@ STAGE PLANS: Group By Operator aggregations: count(cfloat) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:float) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4156,7 +4172,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4249,10 +4265,10 @@ STAGE PLANS: Group By Operator aggregations: count(cstring1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4273,7 +4289,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4366,10 +4382,10 @@ STAGE PLANS: Group By Operator aggregations: count(cboolean1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:boolean) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4390,7 +4406,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 740f83a..68088c7 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -127,6 +127,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index dae07eb..d0bfd58 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -3764,6 +3764,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 9443395..dec4fa6 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -144,6 +144,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -254,6 +256,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -364,6 +368,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index d5c3c0a..72347a0 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -84,6 +84,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 96caaee..24d2572 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -199,6 +199,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -353,6 +355,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:binary native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 34028c0..e2be288 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -147,6 +147,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index 010eacc..f617f79 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -300,6 +302,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index ed1a076..45b13ac 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -279,6 +281,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index e76d499..10ab5a4 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -357,6 +357,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index 2360ed6..10e971c 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -232,6 +234,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -411,6 +415,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -576,6 +582,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index dbd9e35..6a12e8e 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -590,6 +590,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1179,6 +1181,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_delete_orig_table.q.out ql/src/test/results/clientpositive/vector_delete_orig_table.q.out index dee175a..abd52ce 100644 --- ql/src/test/results/clientpositive/vector_delete_orig_table.q.out +++ ql/src/test/results/clientpositive/vector_delete_orig_table.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index 1926262..b61d8ec 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -142,6 +142,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index aac72ee..c419b91 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -47,6 +47,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -194,6 +196,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -349,6 +353,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -504,6 +510,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index 9e527ee..16fcd04 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -144,6 +144,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 70cdd7a..a88aa08 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -57,6 +57,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -340,6 +342,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_multikey.q.out ql/src/test/results/clientpositive/vector_groupby_multikey.q.out new file mode 100644 index 0000000..d8d602a --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_multikey.q.out @@ -0,0 +1,2351 @@ +PREHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a.key1 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 EXPRESSION [] +POSTHOOK: Lineage: groupby_multi_1a.key1 EXPRESSION [] +PREHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: Lineage: groupby_multi_1a_nonull.key0 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a_nonull.key1 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +NULL NULL 2 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +NULL NULL +PREHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a_nonull + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string), ts (type: timestamp) + outputColumnNames: bo, s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string) + outputColumnNames: bo, s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), d (type: double), ts (type: timestamp) + outputColumnNames: si, d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), ts (type: timestamp) + outputColumnNames: si, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: f (type: float), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: f, dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(f) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: si, i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(si) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 +65536 4294967307 +65536 4294967308 +65536 4294967312 +65536 4294967317 +65536 4294967320 +65536 4294967326 +65536 4294967334 +65536 4294967336 +65536 4294967338 diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index dfc4ea5..7b8db7d 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -270,6 +270,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -466,6 +468,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -747,6 +751,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -949,6 +955,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out new file mode 100644 index 0000000..3ef66a4 --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out @@ -0,0 +1,11313 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +NULL +PREHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a_nonull + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +1569543799237464101 +3313583664488247651 +968819023021777205 +PREHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +3313583664488247651 +968819023021777205 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +NULL +PREHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b_nonull + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +NULL +PREHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c_nonull + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +PREHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 +1928928239 +PREHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: insert into groupby_decimal64_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1a values (333.33) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (333.33) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp EXPRESSION [] +POSTHOOK: Lineage: groupby_decimal64_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,3)/DECIMAL_64) -> 2:decimal(6,3) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,3)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(6,3)] + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +NULL +PREHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a_nonull + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,3)/DECIMAL_64) -> 2:decimal(6,3) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,3)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(6,3)] + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:decimal(8,2)/DECIMAL_64) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: c_timestamp (type: timestamp), key (type: decimal(8,2)) + outputColumnNames: c_timestamp, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_timestamp) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:timestamp) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(8,2)] + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +NULL +PREHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b_nonull + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(8,2)] + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +NULL +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +QNCYBDW +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a_nonull + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +MXGDMBD +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +MXGD +NULL +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b_nonull + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +MXGD +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +NULL +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c_nonull + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +NULL +PREHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a_nonull + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +NULL +PREHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b_nonull + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 5a7227d..0729324 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -168,6 +168,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -283,6 +285,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 61fa6df..b2c2a3b 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -247,6 +247,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 8dc7143..27e0904 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -145,6 +145,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index 3d2cd4a..6b2efcc 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -715,6 +715,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index f949655..f68ef62 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -347,6 +347,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index 205f2e8..740121e 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -246,9 +246,16 @@ left outer join small_alltypesorc_a_n1 hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### +======= +<<<<<<< HEAD +>>>>>>> eb011a9... more {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4493 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 4942 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -290,9 +297,16 @@ left outer join small_alltypesorc_a_n1 hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### +======= +<<<<<<< HEAD +>>>>>>> eb011a9... more {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4493 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 4942 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -334,9 +348,16 @@ left outer join small_alltypesorc_a_n1 hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### +======= +<<<<<<< HEAD +>>>>>>> eb011a9... more {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2` AND `t`.`cbigint` = `t0`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1` AND `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4493 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 4942 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2` AND `t`.`cbigint` = `t0`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1` AND `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index 1de49dd..e5a87ca 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -792,9 +792,16 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### +======= +<<<<<<< HEAD +>>>>>>> eb011a9... more {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7480 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8228 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out index 1482966..fae5282 100644 --- ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out +++ ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out @@ -104,6 +104,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -255,6 +257,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 886684a..78f78cf 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -66,6 +66,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out index 23ac4d2..9d29e0b 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out @@ -105,6 +105,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index 2e30fc4..e9b2151 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -356,6 +356,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 20:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_topnkey.q.out ql/src/test/results/clientpositive/vector_topnkey.q.out index e370489..817b40c 100644 --- ql/src/test/results/clientpositive/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/vector_topnkey.q.out @@ -44,6 +44,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -212,6 +214,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 6b374f1..0f15dd9 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -60,6 +60,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_1.q.out ql/src/test/results/clientpositive/vectorization_1.q.out index af6f03d..6f0a07f 100644 --- ql/src/test/results/clientpositive/vectorization_1.q.out +++ ql/src/test/results/clientpositive/vectorization_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_12.q.out ql/src/test/results/clientpositive/vectorization_12.q.out index 0a93463..7aa2bea 100644 --- ql/src/test/results/clientpositive/vectorization_12.q.out +++ ql/src/test/results/clientpositive/vectorization_12.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index 8897f84..f5f3eac 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -447,6 +449,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 88e41cc..7e8e9db 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 6fe8661..b742ede 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index f19dc30..272aee0 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_2.q.out ql/src/test/results/clientpositive/vectorization_2.q.out index 75c205b..30140fb 100644 --- ql/src/test/results/clientpositive/vectorization_2.q.out +++ ql/src/test/results/clientpositive/vectorization_2.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_3.q.out ql/src/test/results/clientpositive/vectorization_3.q.out index 6d49d67..7f6ecf7 100644 --- ql/src/test/results/clientpositive/vectorization_3.q.out +++ ql/src/test/results/clientpositive/vectorization_3.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_4.q.out ql/src/test/results/clientpositive/vectorization_4.q.out index 0ec3de4..210e44f 100644 --- ql/src/test/results/clientpositive/vectorization_4.q.out +++ ql/src/test/results/clientpositive/vectorization_4.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_5.q.out ql/src/test/results/clientpositive/vectorization_5.q.out index e0bfa72..e15c835 100644 --- ql/src/test/results/clientpositive/vectorization_5.q.out +++ ql/src/test/results/clientpositive/vectorization_5.q.out @@ -83,6 +83,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index f19dc30..272aee0 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 274bae5..d5da792 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -259,6 +259,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -443,6 +445,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -760,6 +764,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index 831cd4e..bce0897 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -42,6 +42,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 6e0b719..ffc7bbd 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1260,6 +1260,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index 762f381..6da4e7b 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -99,6 +99,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 3e34e9c..8d54e16 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -122,6 +122,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index 0c99a0a..51d8afd 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -139,6 +139,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -320,6 +322,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -501,6 +505,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index b37298b..308ae96 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -372,6 +372,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index e6d2666..1e53c91 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -143,6 +143,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -339,6 +341,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -450,6 +454,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 7609083..cb8aae2 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -954,6 +954,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1065,6 +1067,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1189,6 +1193,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index 62f59af..3dadef6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -24,6 +24,7 @@ import java.util.Deque; import java.util.List; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -98,12 +99,24 @@ * Use this constructor when only ascending sort order is used. */ public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) { - this(typeInfos, useExternalBuffer, null, null, null); + this(typeInfos, null, useExternalBuffer, null, null, null); + } + + public BinarySortableDeserializeRead(TypeInfo[] typeInfos, + DataTypePhysicalVariation[] dataTypePhysicalVariations, boolean useExternalBuffer) { + this(typeInfos, dataTypePhysicalVariations, useExternalBuffer, null, null, null); } public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer, - boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { - super(typeInfos, useExternalBuffer); + boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { + this( + typeInfos, null, useExternalBuffer, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); + } + public BinarySortableDeserializeRead(TypeInfo[] typeInfos, + DataTypePhysicalVariation[] dataTypePhysicalVariations, boolean useExternalBuffer, + boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker) { + super(typeInfos, dataTypePhysicalVariations, useExternalBuffer); final int count = typeInfos.length; root = new Field(); diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 0e147be..38d1710 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -26,10 +26,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Task; @@ -1261,6 +1264,82 @@ //template, , , {"VectorUDAFVarMerge", "VectorUDAFVarPartial2", "PARTIAL2"}, {"VectorUDAFVarMerge", "VectorUDAFVarFinal", "FINAL"}, + + // Native Vector GROUP BY Single "COUNT" Aggregation. + {"GroupByHashSingleKeyCountColumnOperator", "VectorGroupByHash", "Long", "CountColumn", "Count"}, + {"GroupByHashSingleKeyCountColumnOperator", "VectorGroupByHash", "String", "CountColumn", "Count"}, + {"GroupByHashSingleKeyCountColumnOperator", "VectorGroupByHash", "Single", "CountColumn", "Count"}, + + {"GroupByHashSingleKeyCountKeyOperator", "VectorGroupByHash", "Long", "CountKey", "Count"}, + {"GroupByHashSingleKeyCountKeyOperator", "VectorGroupByHash", "String", "CountKey", "Count"}, + {"GroupByHashSingleKeyCountKeyOperator", "VectorGroupByHash", "Single", "CountKey", "Count"}, + + {"GroupByHashSingleKeyCountStarOperator", "VectorGroupByHash", "Long", "CountStar", "Count"}, + {"GroupByHashSingleKeyCountStarOperator", "VectorGroupByHash", "String", "CountStar", "Count"}, + {"GroupByHashSingleKeyCountStarOperator", "VectorGroupByHash", "Single", "CountStar", "Count"}, + + // Use Long key base class for Decimal64 key. + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "CountColumn", ""}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "CountKey", ""}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "CountStar", ""}, + + {"GroupByHashMultiKeyCountColumnOperator", "VectorGroupByHash", "Multi", "CountColumn", "Count"}, + {"GroupByHashMultiKeyCountKeyOperator", "VectorGroupByHash", "Multi", "CountKey", "Count"}, + {"GroupByHashMultiKeyCountStarOperator", "VectorGroupByHash", "Multi", "CountStar", "Count"}, + + + // Native Vector GROUP BY Duplicate Reduction. + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Long", "DuplicateReduction", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "String", "DuplicateReduction", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Single", "DuplicateReduction", "DuplicateReduction"}, + + // Use Long key base class for Decimal64 key. + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "DuplicateReduction", ""}, + + {"GroupByHashMultiKeyDuplicateReductionOperator", "VectorGroupByHash", "Multi", "DuplicateReduction", "DuplicateReduction"}, + + + // Native Vector GROUP BY Single {MIN|MAX|SUM} Aggregation. + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Long", "LongMaxColumn", "Max", "Long"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "String", "LongMaxColumn", "Max", "Long"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Single", "LongMaxColumn", "Max", "Long"}, + + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Long", "LongMinColumn", "Min", "Long"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "String", "LongMinColumn", "Min", "Long"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Single", "LongMinColumn", "Min", "Long"}, + + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Long", "LongSumColumn", "Sum", "Long"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "String", "LongSumColumn", "Sum", "Long"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Single", "LongSumColumn", "Sum", "Long"}, + + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Long", "Decimal64SumColumn", "Sum", "Decimal64"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "String", "Decimal64SumColumn", "Sum", "Decimal64"}, + {"GroupByHashSingleKeyWordAggrColumnOperator", "VectorGroupByHash", "Single", "Decimal64SumColumn", "Sum", "Decimal64"}, + + // Use Long key base class for Decimal64 key. + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "LongMaxColumn", ""}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "LongMinColumn", ""}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "LongSumColumn", ""}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "Decimal64SumColumn", ""}, + + {"GroupByHashMultiKeyWordAggrColumnOperator", "VectorGroupByHash", "Multi", "LongMaxColumn", "Max", "Long"}, + {"GroupByHashMultiKeyWordAggrColumnOperator", "VectorGroupByHash", "Multi", "LongMinColumn", "Min", "Long"}, + {"GroupByHashMultiKeyWordAggrColumnOperator", "VectorGroupByHash", "Multi", "LongSumColumn", "Sum", "Long"}, + {"GroupByHashMultiKeyWordAggrColumnOperator", "VectorGroupByHash", "Multi", "Decimal64SumColumn", "Sum", "Decimal64"}, + + // Same key variant, but use Long aggregation base class for Decimal64 Min/Max. + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "", "LongMaxColumn", "Decimal64MaxColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "", "LongMinColumn", "Decimal64MinColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "String", "", "LongMaxColumn", "Decimal64MaxColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "String", "", "LongMinColumn", "Decimal64MinColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Single", "", "LongMaxColumn", "Decimal64MaxColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Single", "", "LongMinColumn", "Decimal64MinColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Multi", "", "LongMaxColumn", "Decimal64MaxColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Multi", "", "LongMinColumn", "Decimal64MinColumn"}, + + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "LongMaxColumn", "Decimal64MaxColumn"}, + {"GroupByHashDecimal64KeyOperator", "VectorGroupByHash", "Long", "Decimal64", "LongMinColumn", "Decimal64MinColumn"}, + }; @@ -1273,6 +1352,11 @@ private String udafOutputDirectory; private String udafClassesDirectory; private String udafTemplateDirectory; + + private String groupByOperatorOutputDirectory; + private String groupByOperatorClassesDirectory; + private String groupByOperatorTemplateDirectory; + private GenVectorTestCode testCodeGen; static String joinPath(String...parts) { @@ -1309,6 +1393,16 @@ public void init(String templateBaseDir, String buildDir) { udafTemplateDirectory = joinPath(generationDirectory.getAbsolutePath(), "UDAFTemplates"); + String groupByOperator = joinPath("org", "apache", "hadoop", + "hive", "ql", "exec", "vector", "groupby", "operator", "gen"); + File groupByOperatorOutput = new File(joinPath(buildPath, groupByOperator)); + File groupByOperatorClasses = new File(joinPath(compiledPath, groupByOperator)); + groupByOperatorOutputDirectory = groupByOperatorOutput.getAbsolutePath(); + groupByOperatorClassesDirectory = groupByOperatorClasses.getAbsolutePath(); + + groupByOperatorTemplateDirectory = + joinPath(generationDirectory.getAbsolutePath(), "GroupByOperatorTemplates"); + File testCodeOutput = new File( joinPath(buildDir, "generated-test-sources", "java", "org", @@ -1556,6 +1650,21 @@ private void generate() throws Exception { } else if (tdesc[0].equals("TimestampArithmeticDate")) { generateTimestampArithmeticDate(tdesc); + } else if ( + tdesc[0].equals("GroupByHashSingleKeyOperatorBase") || + tdesc[0].equals("GroupByHashSingleKeyCountColumnOperator") || + tdesc[0].equals("GroupByHashSingleKeyCountKeyOperator") || + tdesc[0].equals("GroupByHashSingleKeyCountStarOperator") || + tdesc[0].equals("GroupByHashMultiKeyCountColumnOperator") || + tdesc[0].equals("GroupByHashMultiKeyCountKeyOperator") || + tdesc[0].equals("GroupByHashMultiKeyCountStarOperator") || + tdesc[0].equals("GroupByHashSingleKeyDuplicateReductionOperator") || + tdesc[0].equals("GroupByHashMultiKeyDuplicateReductionOperator") || + tdesc[0].equals("GroupByHashSingleKeyWordAggrColumnOperator") || + tdesc[0].equals("GroupByHashMultiKeyWordAggrColumnOperator")) { + generateGroupByOperator(tdesc); + } else if (tdesc[0].equals("GroupByHashDecimal64KeyOperator")) { + generateGroupByHashDecimal64KeyOperator(tdesc); } else { continue; } @@ -3731,16 +3840,96 @@ private static boolean isTimestampIntervalType(String type) { || type.equals("interval_day_time")); } - private boolean containsDefinedStrings(Set defineSet, String commaDefinedString) { - String[] definedStrings = commaDefinedString.split(","); - boolean result = false; - for (String definedString : definedStrings) { - if (defineSet.contains(definedString)) { - result = true; - break; - } + private void generateGroupByOperator(String[] tdesc) throws Exception { + String templateName = tdesc[0]; + String classNamePrefix = tdesc[1]; + String keyVariation = tdesc[2]; + String action = tdesc[3]; + String aggregationVariation = tdesc[4]; + final boolean isAggregate = + (aggregationVariation.equals("Min") || + aggregationVariation.equals("Max") || + aggregationVariation.equals("Sum")); + final String logicalAggregateDataType; + final String hiveAggregateDataType; + final String aggregateColumnVectorType; + if (isAggregate) { + logicalAggregateDataType = tdesc[5]; + hiveAggregateDataType = + logicalAggregateDataType.equals("Decimal64") ? + "long" : logicalAggregateDataType.toLowerCase(); + aggregateColumnVectorType = logicalAggregateDataType + "ColumnVector"; + } else { + logicalAggregateDataType = ""; + hiveAggregateDataType = ""; + aggregateColumnVectorType = ""; + } + + //Read the template into a string; + String className = classNamePrefix + keyVariation + "Key" + action + "Operator"; + File templateFile = + new File(joinPath(this.groupByOperatorTemplateDirectory, templateName + ".txt")); + String templateString = readFile(templateFile); + + String keyDefineName = keyVariation.toUpperCase() + "_KEY"; + String defineName = keyDefineName; + final String aggregateDefineName; + if (isAggregate) { + aggregateDefineName = + logicalAggregateDataType.toUpperCase() + "_" + aggregationVariation.toUpperCase(); + defineName += "," + aggregateDefineName; + } else { + aggregateDefineName = "NONE"; + } + templateString = evaluateIfDefined(templateString, defineName, + this.groupByOperatorTemplateDirectory); + + templateString = templateString.replaceAll("", className); + final String keyColumnVectorType; + if (keyVariation.equals("Long") || keyVariation.equals("Decimal64")) { + keyColumnVectorType = "LongColumnVector"; + } else if (keyVariation.equals("String")) { + keyColumnVectorType = "BytesColumnVector"; + } else { + keyColumnVectorType = "ColumnVector"; } - return result; + templateString = templateString.replaceAll("", keyVariation); + templateString = templateString.replaceAll("", keyVariation.toLowerCase()); + templateString = templateString.replaceAll("", aggregationVariation); + templateString = templateString.replaceAll("", aggregationVariation.toLowerCase()); + templateString = templateString.replaceAll("", keyColumnVectorType); + templateString = templateString.replaceAll("", hiveAggregateDataType); + templateString = templateString.replaceAll("", aggregateColumnVectorType); + templateString = templateString.replaceAll("", aggregateDefineName); + + writeFile(templateFile.lastModified(), groupByOperatorOutputDirectory, groupByOperatorClassesDirectory, + className, templateString); + } + + private void generateGroupByHashDecimal64KeyOperator(String[] tdesc) throws Exception { + String templateName = tdesc[0]; + String classNamePrefix = tdesc[1]; + String currentKeyVariation = tdesc[2]; + String newKeyVariation = tdesc[3]; + String currentAction = tdesc[4]; + String newAction = tdesc[5]; + + String baseClassName = + classNamePrefix + currentKeyVariation + "Key" + currentAction + "Operator"; + String className = + classNamePrefix + + (newKeyVariation.length() > 0 ? newKeyVariation : currentKeyVariation) + "Key" + + (newAction.length() > 0 ? newAction : currentAction) + "Operator"; + + File templateFile = + new File(joinPath(this.groupByOperatorTemplateDirectory, templateName + ".txt")); + String templateString = readFile(templateFile); + + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", className); + + writeFile(templateFile.lastModified(), groupByOperatorOutputDirectory, groupByOperatorClassesDirectory, + className, templateString); } private boolean matchesDefinedStrings(Set defineSet, Set newIfDefinedSet, @@ -3797,27 +3986,43 @@ private IfDefinedMode parseIfDefinedMode(String newIfDefinedString, Set return ifDefinedMode; } - private int doIfDefinedStatement(String[] lines, int index, Set desiredIfDefinedSet, - boolean outerInclude, StringBuilder sb) { - String ifLine = lines[index]; + private int doIfDefinedStatement(List linesList, int index, + Set desiredIfDefinedSet, boolean outerInclude, + List ifDefinedEvaluatedLinesList, boolean isExactFilter, boolean filterPredicate) { + String ifLine = linesList.get(index); final int ifLineNumber = index + 1; - String ifDefinedString = ifLine.substring("#IF ".length()); + String ifDefinedString = ifLine.substring("#IF ".length()); Set ifDefinedSet = new HashSet(); IfDefinedMode ifDefinedMode = parseIfDefinedMode(ifDefinedString, ifDefinedSet); - boolean includeBody = matchesDefinedStrings(desiredIfDefinedSet, ifDefinedSet, ifDefinedMode); + + boolean includeBody; + final boolean isExactMatch; + if (isExactFilter) { + + // Normally, we throw away any #IF statements that don't match the desired set. + // But optionally, we filter on exact #IF/@ELSE/#ENDIF statements and let all others through. + isExactMatch = desiredIfDefinedSet.equals(ifDefinedSet); + if (isExactMatch) { + includeBody = filterPredicate; + } else { + includeBody = true; + } + } else { + includeBody = matchesDefinedStrings(desiredIfDefinedSet, ifDefinedSet, ifDefinedMode); + isExactMatch = false; + } index++; - final int end = lines.length; + final int end = linesList.size(); while (true) { if (index >= end) { throw new RuntimeException("Unmatched #IF at line " + index + " for " + ifDefinedString); } - String line = lines[index]; + String line = linesList.get(index); if (line.length() == 0 || line.charAt(0) != '#') { if (outerInclude && includeBody) { - sb.append(line); - sb.append("\n"); + ifDefinedEvaluatedLinesList.add(line); } index++; continue; @@ -3828,10 +4033,18 @@ private int doIfDefinedStatement(String[] lines, int index, Set desiredI // Recurse. index = doIfDefinedStatement( - lines, index, desiredIfDefinedSet, outerInclude && includeBody, sb); + linesList, index, desiredIfDefinedSet, outerInclude && includeBody, + ifDefinedEvaluatedLinesList, isExactFilter, filterPredicate); } else if (line.equals("#ELSE")) { + // Flip inclusion. - includeBody = !includeBody; + if (isExactFilter) { + if (isExactMatch) { + includeBody = !includeBody; + } + } else { + includeBody = !includeBody; + } index++; } else if (line.equals("#ENDIF")) { throw new RuntimeException("Missing defined strings with #ENDIF on line " + (index + 1)); @@ -3843,48 +4056,355 @@ private int doIfDefinedStatement(String[] lines, int index, Set desiredI " do not match \"" + ifDefinedString + "\" (line " + (index + 1) + ")"); } return ++index; + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); + } else { + if (outerInclude && includeBody) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; + continue; + } + } + } + + private void doProcessIfDefined(List linesList, int index, Set definedSet, + boolean outerInclude, List ifDefinedEvaluatedLinesList, + boolean isExactFilter, boolean predicate) { + final int end = linesList.size(); + while (true) { + if (index >= end) { + break; + } + String line = linesList.get(index); + if (line.length() == 0 || line.charAt(0) != '#') { + if (outerInclude) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; + continue; + } + + if (line.startsWith("#IF ")) { + + // A pound # statement (#IF #ELSE #ENDIF). + index = + doIfDefinedStatement( + linesList, index, definedSet, outerInclude, + ifDefinedEvaluatedLinesList, isExactFilter, predicate); + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + if (outerInclude) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; } } } - private void doEvaluateIfDefined(String[] lines, int index, Set definedSet, - boolean outerInclude, StringBuilder sb) { - final int end = lines.length; - while (true) { - if (index >= end) { - break; + private void doUseLinesCollectAndFilter(List linesList, + Map> useLinesMap, List filteredLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#BEGIN_LINES ")) { + + final int beginLineIndex = index; + String linesTitle = line.substring("#BEGIN_LINES ".length()); + if (useLinesMap.containsKey(linesTitle)) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- duplicate name " + linesTitle); } - String line = lines[index]; - if (line.length() == 0 || line.charAt(0) != '#') { - if (outerInclude) { - sb.append(line); - sb.append("\n"); + while (true) { + if (index >= size) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- no matching #END_LINES found"); + } + line = linesList.get(index); + if (line.startsWith("#END_LINES")) { + useLinesMap.put(linesTitle, linesList.subList(beginLineIndex + 1, index)); + break; } index++; - continue; } + } else if (line.startsWith("#COMMENT")) { + // Filter out comment lines. + } else { + filteredLinesList.add(line); + } + index++; + } + } + + private void doUseLinesApply(List linesList, Map> useLinesMap, + List resultLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#USE_LINES ")) { + + String linesTitle = line.substring("#USE_LINES ".length()); + final int blankCharIndex = linesTitle.indexOf(" "); + int pad = 0; + if (blankCharIndex != -1) { + String remainder = linesTitle.substring(blankCharIndex + 1); + linesTitle = linesTitle.substring(0, blankCharIndex); + if (!remainder.startsWith("+")) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- expecting + sign for indent"); + } + String padString = remainder.substring(1); + pad = Integer.valueOf(padString); + } + List useLines = useLinesMap.get(linesTitle); + if (useLines == null) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- name " + linesTitle + " not found"); + } + if (pad == 0) { + resultLinesList.addAll(useLines); + } else { + String padoutString = StringUtils.leftPad("", pad); + for (String useLine : useLines) { + if (useLine.length() > 0) { + resultLinesList.add(padoutString + useLine); + } else { + // Do not pad out empty lines. + resultLinesList.add(useLine); + } + } + } + } else { + resultLinesList.add(line); + } + index++; + } + } + + private void doIncludeProcessing(String[] lines, String templateDirectory, + List resultList) throws IOException { + + // Just one level. + int index = 0; + final int size = lines.length; + while (true) { + + if (index >= size) { + return; + } + String line = lines[index]; + if (line.startsWith("#INCLUDE ")) { + String includeFileName = line.substring("#INCLUDE ".length()); + File includeFile; + String includeString; + final int blankCharIndex = includeFileName.indexOf(" "); + if (blankCharIndex != -1) { + String remainder = includeFileName.substring(blankCharIndex + 1); + includeFileName = includeFileName.substring(0, blankCharIndex); + + includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + includeString = readFile(includeFile); + + // Process optional comma separated parameters. + String[] parameters = remainder.split(","); + List filterIfDefinedList = new ArrayList(); + List filterIfPredicateList = new ArrayList(); + List substitutionNames = new ArrayList(); + List substitutions = new ArrayList(); + for (String parameter : parameters) { + Character firstChar = parameter.charAt(0); + if (Character.isUpperCase(firstChar)) { + + // #IF filter. + final int equalsCharIndex = parameter.indexOf("="); + if (equalsCharIndex == -1) { + throw new RuntimeException( + "Problem #INCLUDE #IF filter " + index + + " -- no '='"); + } + String filterIfDefinedName = parameter.substring(0, equalsCharIndex); + String predicateString = parameter.substring(equalsCharIndex + 1); + final boolean predicate; + if (predicateString.equalsIgnoreCase("true")) { + predicate = true; + } else if (predicateString.equalsIgnoreCase("false")) { + predicate = false; + } else { + throw new RuntimeException( + "Problem #INCLUDE #IF filter " + index + + " -- expecting 'true' or 'false'"); + } + + filterIfDefinedList.add(filterIfDefinedName); + filterIfPredicateList.add(predicate); + } else if (firstChar == '<') { + + // Substitution. + final int closeCharIndex = parameter.indexOf(">"); + if (closeCharIndex == -1) { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- no '>'"); + } + // Keep <>. + String substitutionName = parameter.substring(0, closeCharIndex + 1); + + Character equalsChar = parameter.charAt(closeCharIndex + 1); + if (equalsChar != '=') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- not '='"); + } + final int substitutionIndex = closeCharIndex + 2; + Character startQuote = parameter.charAt(substitutionIndex); + if (startQuote != '"') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- missing start euote '\"'"); + } + final int parameterSize = parameter.length(); + Character endQuote = parameter.charAt(parameterSize - 1); + if (endQuote != '"') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- missing end euote '\"'"); + } + String substitution = parameter.substring(substitutionIndex + 1, parameterSize - 1); + + substitutionNames.add(substitutionName); + substitutions.add(substitution); + } + } - // A pound # statement (IF/ELSE/ENDIF). - if (line.startsWith("#IF ")) { - index = doIfDefinedStatement(lines, index, definedSet, outerInclude, sb); + // Example: + // + // #INCLUDE file LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + // + final int filterCount = filterIfDefinedList.size(); + for (int f = 0; f < filterCount; f++) { + + // Only process any #IF/#ELSE/#ENDIF that are exact matches. + includeString = + exactFilterIfDefined( + includeString, filterIfDefinedList.get(f), filterIfPredicateList.get(f)); + } + final int substitutionCount = substitutionNames.size(); + for (int s = 0; s < substitutionCount; s++) { + includeString = + includeString.replaceAll( + substitutionNames.get(s), substitutions.get(s)); + } } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + includeString = readFile(includeFile); } + String[] includeLines = includeString.split("\n"); + List includeLinesList = Arrays.asList(includeLines); + resultList.addAll(includeLinesList); + } else { + resultList.add(line); } + index++; + } + } + + private String processIfDefined(String linesString, List definedList, + String templateDirectory) throws IOException { + return processIfDefined( + linesString, definedList, templateDirectory, + /* isExactFilter */ false, /* filterPredicate */ false); } - private String evaluateIfDefined(String linesString, List definedList) { + private String processIfDefined(String linesString, List definedList, + String templateDirectory, boolean isExactFilter, boolean filterPredicate) throws IOException { + String[] lines = linesString.split("\n"); Set definedSet = new HashSet(definedList); + + List includedLinesList; + if (templateDirectory == null) { + includedLinesList = Arrays.asList(lines); + } else { + includedLinesList = new ArrayList(); + doIncludeProcessing(lines, templateDirectory, includedLinesList); + } + + List ifDefinedEvaluatedLinesList = new ArrayList(); + doProcessIfDefined( + includedLinesList, 0, definedSet, true, ifDefinedEvaluatedLinesList, + isExactFilter, filterPredicate); + + Map> useLinesMap = new HashMap>(); + List filteredLinesList = new ArrayList(); + doUseLinesCollectAndFilter(ifDefinedEvaluatedLinesList, useLinesMap, filteredLinesList); + + List resultLinesList; + if (useLinesMap.isEmpty()) { + resultLinesList = filteredLinesList; + } else { + resultLinesList = new ArrayList(); + doUseLinesApply(filteredLinesList, useLinesMap, resultLinesList); + } + StringBuilder sb = new StringBuilder(); - doEvaluateIfDefined(lines, 0, definedSet, true, sb); + for (String line : resultLinesList) { + sb.append(line); + sb.append("\n"); + } return sb.toString(); } - private String evaluateIfDefined(String linesString, String definedString) { - return evaluateIfDefined(linesString, Arrays.asList(definedString.split(","))); + private String evaluateIfDefined(String linesString, List definedList) + throws IOException { + return processIfDefined(linesString, definedList, null); + } + + private String evaluateIfDefined(String linesString, String definedString) + throws IOException{ + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), null); + } + + private String exactFilterIfDefined(String linesString, String definedString, + boolean filterPredicate) + throws IOException{ + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), null, true, filterPredicate); + } + + private String evaluateIfDefined(String linesString, String definedString, + String templateDirectory) throws IOException { + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), templateDirectory); } static void writeFile(long templateTime, String outputDir, String classesDir,