diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 4fb8a30..c7dc674 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3596,6 +3596,23 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Exceeding this will trigger a flush irrelevant of memory pressure condition."), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), + HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED( + "hive.vectorized.execution.groupby.native.enabled", true, + "This flag should be set to true to enable the native vectorization of queries using GroupBy.\n" + + "The default value is true."), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE( + "hive.test.vectorized.execution.groupby.native.override", + "none", new StringSet("none", "enable", "disable"), + "internal use only, used to override the hive.vectorized.execution.groupby.native.enabled\n" + + "setting. Using enable will force it on and disable will force it off.\n" + + "The default none is do nothing, of course", + true), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE( + "hive.test.vectorized.groupby.native.max.memory.available", -1, + "internal use only, used for creating different vectorized hash table sizes\n" + + "to exercise more logic\n" + + "The default value is -1 which means don't use it", + true), HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + diff --git data/files/groupby_decimal64_1a.txt data/files/groupby_decimal64_1a.txt new file mode 100644 index 0000000..dbe0d86 --- /dev/null +++ data/files/groupby_decimal64_1a.txt @@ -0,0 +1,18 @@ +55.33 +44.2 +435.33 +324.33 +324.33 +-0.342 +44.2 +55.3 +55.3 +0.0 +66.4 +23.22 +-87.2 +\N +33.44 +55.3 +435.331 +-0.342 \ No newline at end of file diff --git data/files/groupby_decimal64_1a_nonull.txt data/files/groupby_decimal64_1a_nonull.txt new file mode 100644 index 0000000..16ae9e4 --- /dev/null +++ data/files/groupby_decimal64_1a_nonull.txt @@ -0,0 +1,17 @@ +55.33 +44.2 +435.33 +324.33 +324.33 +-0.342 +44.2 +55.3 +55.3 +0.0 +66.4 +23.22 +-87.2 +33.44 +55.3 +435.331 +-0.342 \ No newline at end of file diff --git data/files/groupby_decimal64_1b.txt data/files/groupby_decimal64_1b.txt new file mode 100644 index 0000000..c99fd34 --- /dev/null +++ data/files/groupby_decimal64_1b.txt @@ -0,0 +1,17 @@ +4143-07-08 10:53:27.252,3566.02 +5339-02-01 14:10:01.0,7286.29 +5339-02-01 14:10:01.0,2755.40 +2003-09-23 22:33:17.00003252,2516.50 +5397-07-13 07:12:32.000896438,16966.99 +4143-07-08 10:53:27.252,16966.0 +4143-07-08 10:53:27.252,10402 +2003-09-23 22:33:17.00003252,1735.22 +1966-08-16 13:36:50.1,645.07 +\N,15464.67 +1966-08-16 13:36:50.1,8925.82 +1966-08-16 13:36:50.1,11041.91 +7160-12-02 06:00:24.81,645.93 +1976-05-06 00:42:30.910786948,13831.90 +9075-06-13 16:20:09,9559.53 +1985-07-20 09:30:11.0,\N +1999-10-03 16:59:10.396903939,2755.9 \ No newline at end of file diff --git data/files/groupby_decimal64_1b_nonull.txt data/files/groupby_decimal64_1b_nonull.txt new file mode 100644 index 0000000..974cb9d --- /dev/null +++ data/files/groupby_decimal64_1b_nonull.txt @@ -0,0 +1,16 @@ +4143-07-08 10:53:27.252,3566.02 +5339-02-01 14:10:01.0,7286.29 +5339-02-01 14:10:01.0,2755.40 +2003-09-23 22:33:17.00003252,2516.50 +5397-07-13 07:12:32.000896438,16966.99 +4143-07-08 10:53:27.252,16966.0 +4143-07-08 10:53:27.252,10402 +2003-09-23 22:33:17.00003252,1735.22 +1966-08-16 13:36:50.1,645.07 +\N,15464.67 +1966-08-16 13:36:50.1,8925.82 +1966-08-16 13:36:50.1,11041.91 +7160-12-02 06:00:24.81,645.93 +1976-05-06 00:42:30.910786948,13831.90 +9075-06-13 16:20:09,9559.53 +1999-10-03 16:59:10.396903939,2755.9 \ No newline at end of file diff --git data/files/groupby_long_1a.txt data/files/groupby_long_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/groupby_long_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/groupby_long_1a_nonull.txt data/files/groupby_long_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/groupby_long_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/groupby_long_1b.txt data/files/groupby_long_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/groupby_long_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1b_nonull.txt data/files/groupby_long_1b_nonull.txt new file mode 100644 index 0000000..0b438a2 --- /dev/null +++ data/files/groupby_long_1b_nonull.txt @@ -0,0 +1,12 @@ +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1c.txt data/files/groupby_long_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/groupby_long_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_long_1c_nonull.txt data/files/groupby_long_1c_nonull.txt new file mode 100644 index 0000000..f6bc6e8 --- /dev/null +++ data/files/groupby_long_1c_nonull.txt @@ -0,0 +1,10 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_multi_1a.txt data/files/groupby_multi_1a.txt new file mode 100644 index 0000000..e41458d --- /dev/null +++ data/files/groupby_multi_1a.txt @@ -0,0 +1,56 @@ +2268-07-27,43 +1988-01-10,22 +2083-03-10,51 +2207-09-16,15 +2111-10-04,-81 +2088-05-07,-15 +1833-09-17,16 +2204-06-14,22 +1879-03-14,51 +2025-05-17,51 +2207-04-24,-92 +1809-10-10,-28 +1805-12-21,16 +2207-09-16,\N +2194-06-19,-126 +1971-06-16,24 +2251-08-16,\N +1845-11-11,-126 +1858-09-10,22 +2059-05-11,-39 +1892-05-06,-103 +2207-09-16,-13 +1937-09-06,-126 +1820-12-15,51 +2006-12-15,16 +1892-05-06,-121 +\N,-126 +2268-07-27,-12 +2268-07-27,114 +2151-11-20,16 +2268-07-27,118 +2029-11-21,-75 +1859-01-20,16 +1950-10-06,-39 +2185-07-27,51 +2207-09-16,\N +1892-05-06,61 +2207-09-16,-105 +2268-07-27,-117 +2207-04-24,0 +2207-09-16,124 +2059-05-11,-39 +1805-12-21,16 +1805-12-21,16 +2249-12-20,51 +2207-09-16,116 +2207-09-16,122 +2064-09-04,-126 +1869-03-17,-126 +1804-02-16,-39 +1960-04-02,-75 +2086-09-20,-69 +\N,\N +2196-04-12,22 +2251-08-16,-94 +2268-07-27,-12 \ No newline at end of file diff --git data/files/groupby_multi_1a_nonull.txt data/files/groupby_multi_1a_nonull.txt new file mode 100644 index 0000000..9542f64 --- /dev/null +++ data/files/groupby_multi_1a_nonull.txt @@ -0,0 +1,55 @@ +2268-07-27,43 +1988-01-10,22 +2083-03-10,51 +2207-09-16,15 +2111-10-04,-81 +2088-05-07,-15 +1833-09-17,16 +2204-06-14,22 +1879-03-14,51 +2025-05-17,51 +2207-04-24,-92 +1809-10-10,-28 +1805-12-21,16 +2207-09-16,\N +2194-06-19,-126 +1971-06-16,24 +2251-08-16,\N +1845-11-11,-126 +1858-09-10,22 +2059-05-11,-39 +1892-05-06,-103 +2207-09-16,-13 +1937-09-06,-126 +1820-12-15,51 +2006-12-15,16 +1892-05-06,-121 +\N,-126 +2268-07-27,-12 +2268-07-27,114 +2151-11-20,16 +2268-07-27,118 +2029-11-21,-75 +1859-01-20,16 +1950-10-06,-39 +2185-07-27,51 +2207-09-16,\N +1892-05-06,61 +2207-09-16,-105 +2268-07-27,-117 +2207-04-24,0 +2207-09-16,124 +2059-05-11,-39 +1805-12-21,16 +1805-12-21,16 +2249-12-20,51 +2207-09-16,116 +2207-09-16,122 +2064-09-04,-126 +1869-03-17,-126 +1804-02-16,-39 +1960-04-02,-75 +2086-09-20,-69 +2196-04-12,22 +2251-08-16,-94 +2268-07-27,-12 \ No newline at end of file diff --git data/files/groupby_serialize_1a.txt data/files/groupby_serialize_1a.txt new file mode 100644 index 0000000..cae1ecc --- /dev/null +++ data/files/groupby_serialize_1a.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309 +\N +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +\N +\N +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +\N +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1a_nonull.txt data/files/groupby_serialize_1a_nonull.txt new file mode 100644 index 0000000..0520a9a --- /dev/null +++ data/files/groupby_serialize_1a_nonull.txt @@ -0,0 +1,13 @@ +2061-12-19 22:10:32.000628309 +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1b.txt data/files/groupby_serialize_1b.txt new file mode 100644 index 0000000..c47bae0 --- /dev/null +++ data/files/groupby_serialize_1b.txt @@ -0,0 +1,47 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 \ No newline at end of file diff --git data/files/groupby_serialize_1b_nonull.txt data/files/groupby_serialize_1b_nonull.txt new file mode 100644 index 0000000..e640b42 --- /dev/null +++ data/files/groupby_serialize_1b_nonull.txt @@ -0,0 +1,66 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 \ No newline at end of file diff --git data/files/groupby_string_1a.txt data/files/groupby_string_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/groupby_string_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/groupby_string_1a_nonull.txt data/files/groupby_string_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/groupby_string_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/groupby_string_1c.txt data/files/groupby_string_1c.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/groupby_string_1c.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/groupby_string_1c_nonull.txt data/files/groupby_string_1c_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/groupby_string_1c_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java index ca76e6c..859257e 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBench.java @@ -20,6 +20,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -215,6 +218,9 @@ public void setup() throws Exception { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + MapJoinMultiKeyBench.class.getSimpleName() + ".*") + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) .build(); new Runner(opt).run(); } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java index aa88297..919cea4 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -25,9 +25,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.openjdk.jmh.annotations.Param; public abstract class MapJoinMultiKeyBenchBase extends AbstractMapJoin { - + + @Param("100000") // 100,000 + protected int rowCount; + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception { @@ -35,8 +39,6 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, long seed = 2543; - int rowCount = 100000; // 100,000. - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; TypeInfo[] bigTableTypeInfos = new TypeInfo[] { diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java index e13db96..73c6ec5 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBench.java @@ -20,8 +20,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; @@ -46,7 +50,7 @@ public static class MapJoinOneLongKeyInnerRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -54,7 +58,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); } @@ -62,7 +66,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -70,7 +74,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -78,7 +82,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -88,7 +92,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -96,7 +100,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); } @@ -104,7 +108,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -112,7 +116,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -120,7 +124,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyInnerBigOnlyNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.INNER_BIG_ONLY, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -130,7 +134,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -138,7 +142,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiRowModeOptimized_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.ROW_MODE_OPTIMIZED); } @@ -146,7 +150,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -154,7 +158,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -162,7 +166,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyLeftSemiNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.LEFT_SEMI, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -172,7 +176,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterRowModeHashMapBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.ROW_MODE_HASH_MAP); } @@ -188,7 +192,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterVectorPassThrough_Bench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.VECTOR_PASS_THROUGH); } @@ -196,7 +200,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterNativeVectorOptimizedBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED); } @@ -204,7 +208,7 @@ public void setup() throws Exception { public static class MapJoinOneLongKeyOuterNativeVectorFastBench extends MapJoinOneLongKeyBenchBase { - @Setup + @Setup(Level.Invocation) public void setup() throws Exception { doSetup(VectorMapJoinVariation.OUTER, MapJoinTestImplementation.NATIVE_VECTOR_FAST); } @@ -215,6 +219,9 @@ public void setup() throws Exception { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + MapJoinOneLongKeyBench.class.getSimpleName() + ".*") + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) .build(); new Runner(opt).run(); } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java index 60b2890..80d3787 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -25,9 +25,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.openjdk.jmh.annotations.Param; public abstract class MapJoinOneLongKeyBenchBase extends AbstractMapJoin { - + + @Param("10000000") // 10,000,000 + protected int rowCount; + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception { @@ -35,8 +39,6 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, long seed = 2543; - int rowCount = 10000000; // 10,000,000. - String[] bigTableColumnNames = new String[] {"number1"}; TypeInfo[] bigTableTypeInfos = new TypeInfo[] { diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java index 6a78a9f..c3307aa 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBench.java @@ -20,6 +20,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.openjdk.jmh.profile.LinuxPerfAsmProfiler; +import org.openjdk.jmh.profile.LinuxPerfNormProfiler; +import org.openjdk.jmh.profile.LinuxPerfProfiler; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -215,6 +218,9 @@ public void setup() throws Exception { public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder() .include(".*" + MapJoinOneStringKeyBench.class.getSimpleName() + ".*") + .addProfiler(LinuxPerfProfiler.class) + .addProfiler(LinuxPerfNormProfiler.class) + .addProfiler(LinuxPerfAsmProfiler.class) .build(); new Runner(opt).run(); } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java index 937ede1..69a323a 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -25,9 +25,13 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.openjdk.jmh.annotations.Param; public abstract class MapJoinOneStringKeyBenchBase extends AbstractMapJoin { - + + @Param("100000") // 100,000 + protected int rowCount; + public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation) throws Exception { @@ -35,8 +39,6 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, long seed = 2543; - int rowCount = 100000; // 100,000. - String[] bigTableColumnNames = new String[] {"b1"}; TypeInfo[] bigTableTypeInfos = new TypeInfo[] { diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java index 1f87f8d..c581dff 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java @@ -38,6 +38,11 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate.CountAggregateKind; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -66,6 +71,12 @@ public class VectorGroupByOperatorBench extends AbstractOperatorBench { @Param({ + "original", + "native" + }) + private String implementation; + + @Param({ "true", "false" }) @@ -93,6 +104,7 @@ @Param({ "count", + // "countStar", "min", "max", "sum", @@ -109,6 +121,7 @@ @Param({ "bigint", + "date", "double", "string", "decimal(7,2)", // to use this via command line arg "decimal(7_2)" @@ -118,7 +131,7 @@ private String dataType; private Random rand = new Random(1234); - private VectorGroupByOperator vgo; + private Operator vgo; private VectorizedRowBatch vrb; private int size = VectorizedRowBatch.DEFAULT_SIZE; @@ -135,10 +148,62 @@ public void setup() { VectorizationContext ctx = new VectorizationContext("name", ImmutableList.of("A")); GroupByDesc desc = buildGroupByDescType(aggregation, evalMode, "A", typeInfo, processMode); Operator groupByOp = OperatorFactory.get(new CompilationOpContext(), desc); - VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc(); + VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) desc.getVectorDesc(); vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH); - vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc); - vgo.initialize(new Configuration(), null); + if (implementation == null || implementation.equalsIgnoreCase("original")) { + vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc); + vgo.initialize(new Configuration(), null); + } else if (implementation.equalsIgnoreCase("native")) { + if (!aggregation.equalsIgnoreCase("count")) { + System.out.println("Only aggregation count with String supported"); + System.exit(0); + } + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + vectorGroupByInfo.setAggregationVariation(AggregationVariation.HASH_COUNT); + final CountAggregateKind countAggregateKind; + // if (desc.getAggregators().get(0).getParameters().size() == 0) { + // countAggregateKind = CountAggregateKind.COUNT_STAR; + // } else { + countAggregateKind = CountAggregateKind.COUNT_KEY; + // } + vectorGroupByInfo.setCountAggregate( + new CountAggregate(countAggregateKind)); + + final HashTableKeyType hashTableKeyType; + switch (dataType) { + case "bigint": + case "date": + hashTableKeyType = HashTableKeyType.LONG; + break; + case "string": + hashTableKeyType = HashTableKeyType.STRING; + break; + default: + hashTableKeyType = HashTableKeyType.SINGLE_KEY; + break; + } + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(20000000); + + vectorGroupByDesc.setVectorGroupByInfo(vectorGroupByInfo); + + String issue = + Vectorizer.doVectorizeGroupByOperatorPreparation( + groupByOp, ctx, vectorGroupByDesc); + if (issue != null) { + System.out.println(issue); + System.exit(0); + } + vgo = + Vectorizer.specializeGroupByOperator( + groupByOp, ctx, (GroupByDesc) groupByOp.getConf(), vectorGroupByDesc); + vgo.initialize(new Configuration(), null); + } else { + System.out.println("Unknown implementation " + implementation); + System.exit(0); + } + System.out.println("implementation class " + vgo.getClass().getSimpleName()); + } catch (Exception e) { // likely unsupported combination of params // https://bugs.openjdk.java.net/browse/CODETOOLS-7901296 is not available yet to skip benchmark cleanly @@ -162,6 +227,11 @@ private GroupByDesc buildGroupByDescType( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + ArrayList keys = new ArrayList(); + keys.add( + new ExprNodeColumnDesc( + dataType, "A", "table", false)); + desc.setKeys(keys); desc.setVectorDesc(new VectorGroupByDesc()); desc.setOutputColumnNames(outputColumnNames); @@ -191,6 +261,9 @@ private AggregationDesc buildAggregationDesc( GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = (GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); udafBloomFilterEvaluator.setHintEntries(10000); + } else if (aggregate.equals("countStar")) { + aggregate = "count"; + params = new ArrayList(); } agg.setGenericUDAFName(aggregate); agg.setMode(mode); diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3672c7a..04daa98 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -279,7 +279,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_groupby4.q,\ vector_groupby6.q,\ vector_groupby_3.q,\ + vector_groupby_singlekey.q,\ vector_groupby_mapjoin.q,\ + vector_groupby_multikey.q,\ vector_groupby_reduce.q,\ vector_grouping_sets.q,\ vector_if_expr.q,\ diff --git ql/pom.xml ql/pom.xml index d73deba..8ccd09d 100644 --- ql/pom.xml +++ ql/pom.xml @@ -865,6 +865,7 @@ classpath="${compile.classpath}"/> + diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt new file mode 100644 index 0000000..18bc868 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt @@ -0,0 +1,291 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to any operator variation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any key variation specific transient variables. +#COMMENT +#BEGIN_LINES COMMON_KEY_VARIATION_TRANSIENT +#IF SINGLE_KEY||MULTI_KEY + // Object that can take the column(s) in row in a vectorized row batch and serialized it. + // The key is not NULL. + private transient VectorSerializeRow keyVectorSerializeWrite; + + // The BinarySortable serialization of the current key. + private transient Output currentKeyOutput; + + // The BinarySortable serialization of the next key for a possible series of equal keys. + private transient Output nextKeyOutput; + +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific Operator import code lines. +#COMMENT +#BEGIN_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Helpful variables for accessing the key values for the LONG and STRING variations. +#COMMENT (None needed for SINGLE_KEY or MULTI_KEY) +#COMMENT +#BEGIN_LINES COMMON_KEY_VECTOR_VARIABLES +#IF LONG_KEY + long[] keyVector = keyColVector.vector; +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[][] keyVector = keyColVector.vector; + final int[] keyStart = keyColVector.start; + final int[] keyLength = keyColVector.length; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + final int firstBatchIndex = selected[0]; +#IF LONG_KEY + long currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[firstBatchIndex]; + int currentKeyStart = keyStart[firstBatchIndex]; + int currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyColIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SINGLE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SINGLE_KEY + final int firstBatchIndex = selected[0]; + if (keyColIsNull[firstBatchIndex]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SINGLE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SINGLE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[firstBatchIndex]; + currentKeyStart = keyStart[firstBatchIndex]; + currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#IF LONG_KEY + long currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[0]; + int currentKeyStart = keyStart[0]; + int currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyColIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SINGLE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SINGLE_KEY + if (keyColIsNull[0]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SINGLE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SINGLE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[0]; + currentKeyStart = keyStart[0]; + currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SINGLE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Get next key value at batchIndex. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES COMMON_GET_NEXT_KEY +#IF LONG_KEY + final long nextKey = keyVector[batchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] nextKey = keyVector[batchIndex]; + final int nextKeyStart = keyStart[batchIndex]; + final int nextKeyLength = keyLength[batchIndex]; +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + final byte[] nextKey = nextKeyOutput.getData(); + final int nextKeyLength = nextKeyOutput.getLength(); +#ENDIF SINGLE_KEY +#IF MULTI_KEY + Not Applicable -- see MULTI_KEY_GET_NEXT_KEY instead. +#ENDIF MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES COMMON_IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT ELSE IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + } else if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + } else if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + } else if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Remember the next key value as the current key value. All variations. +#COMMENT +#BEGIN_LINES COMMON_NEW_CURRENT_KEY + currentKey = nextKey; +#IF STRING_KEY + currentKeyStart = nextKeyStart; + currentKeyLength = nextKeyLength; +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + currentKeyLength = nextKeyLength; + final Output tempOutput = nextKeyOutput; + nextKeyOutput = currentKeyOutput; + currentKeyOutput = tempOutput; +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountColumnTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountColumnTableLines.txt new file mode 100644 index 0000000..7eddb23 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountColumnTableLines.txt @@ -0,0 +1,67 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(non-key-column) aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +#IF LONG_KEY + findOrCreateLongKeyZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT ignore if it already present since the count is 0 in this case. All variations. +#COMMENT +#BEGIN_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS +#IF LONG_KEY + findOrCreateLongKeyZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountKeyTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountKeyTableLines.txt new file mode 100644 index 0000000..2f5915e --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountKeyTableLines.txt @@ -0,0 +1,40 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(key-column) aggregation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES COUNT_KEY_FIND_OR_CREATE_KEY +#IF LONG_KEY + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountStarTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountStarTableLines.txt new file mode 100644 index 0000000..127756f --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCountStarTableLines.txt @@ -0,0 +1,40 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(*) aggregation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES COUNT_STAR_FIND_OR_CREATE_KEY +#IF LONG_KEY + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyCount( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyCount( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionTableLines.txt new file mode 100644 index 0000000..94ab2d0 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionTableLines.txt @@ -0,0 +1,41 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Duplicate Reduction operator variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY +#IF LONG_KEY + if (currentKey == 0) { + haveZeroKey = true; + } else { + createOrIgnoreLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + createOrIgnoreBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + createOrIgnoreBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt new file mode 100644 index 0000000..17e50da --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt @@ -0,0 +1,108 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to MULTI_KEY variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any multi-key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + + final int size = groupByKeyExpressions.length; + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(size)); + + TypeInfo[] typeInfos = new TypeInfo[size]; + int[] columnMap = new int[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + columnMap[i] = keyExpr.getOutputColumnNum(); + } + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any multi-key variation specific next key code lines. +#COMMENT +#BEGIN_LINES MULTI_KEY_GET_NEXT_KEY + final boolean nextKeyIsNull; + final byte[] nextKey; + final int nextKeyLength; + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + if (keyVectorSerializeWrite.getIsAllNulls()) { + nextKeyIsNull = true; + nextKey = null; + nextKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + nextKeyIsNull = false; + nextKey = nextKeyOutput.getData(); + nextKeyLength = nextKeyOutput.getLength(); + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Logical current key values for mutli-key. +#COMMENT +#BEGIN_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES + boolean currKeyIsNull; + byte[] currentKey; + int currentKeyLength; + + final int firstBatchIndex = selected[0]; + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + if (keyVectorSerializeWrite.getIsAllNulls()) { + currKeyIsNull = true; + currentKey = null; + currentKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Physical current key values for mutli-key. +#COMMENT +#BEGIN_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES + boolean currKeyIsNull; + byte[] currentKey; + int currentKeyLength; + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + if (keyVectorSerializeWrite.getIsAllNulls()) { + currKeyIsNull = true; + currentKey = null; + currentKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); + } +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnInclude.txt new file mode 100644 index 0000000..e29ba46 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnInclude.txt @@ -0,0 +1,312 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyCountColumnOperator. +#COMMENT +#COMMENT + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsColumn). + */ + private void doNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + + // Start counting after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start counting after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY +4 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNoNullsColumn(batch, inputLogicalSize); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doRepeatingNullColumn(batch, inputLogicalSize); + + } else { + + // Non-NULL repeating non-key column. + doNoNullsColumn(batch, inputLogicalSize); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsColumn(batch, inputLogicalSize, nonKeyColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnOperator.txt new file mode 100644 index 0000000..f55b3da --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountColumnOperator.txt @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.count.VectorGroupByHashMultiKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(non-key-column) Native Vectorized GroupBy. + * + * (For more comments, see GroupByHashSingleKeyCountColumnInclude.txt). + */ +public class VectorGroupByHashMultiKeyCountColumnOperator + extends VectorGroupByHashMultiKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountColumnOperator() { + super(); + } + + public VectorGroupByHashMultiKeyCountColumnOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = countAggregate.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashCountColumnTableLines + +#INCLUDE GroupByHashMultiKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputCountForNullMultiKey(nullKeyCount); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyInclude.txt new file mode 100644 index 0000000..7f1e9e4 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyInclude.txt @@ -0,0 +1,98 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyCountKeyOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyOperator.txt new file mode 100644 index 0000000..7ae99ae --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountKeyOperator.txt @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.count.VectorGroupByHashMultiKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(key-column) Native Vectorized GroupBy. + * + * (For more comments see GroupByHashSingleKeyCountKeyOperator.txt). + */ +public class VectorGroupByHashMultiKeyCountKeyOperator + extends VectorGroupByHashMultiKeyCountTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountKeyOperator() { + super(); + } + + public VectorGroupByHashMultiKeyCountKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashCountKeyTableLines + +#INCLUDE GroupByHashMultiKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputCountForNullMultiKey(/* nullKeyCount */ 0); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarInclude.txt new file mode 100644 index 0000000..d78f76d --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarInclude.txt @@ -0,0 +1,93 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyCountStarOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarOperator.txt new file mode 100644 index 0000000..9b5a625 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCountStarOperator.txt @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.count.VectorGroupByHashMultiKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(*) Native Vectorized GroupBy that is lookup on + * a single long using a specialized hash map. + * + * (For more comments see GroupByHashSingleKeyCountStarOperator.txt). + */ +public class VectorGroupByHashMultiKeyCountStarOperator + extends VectorGroupByHashMultiKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountStarOperator() { + super(); + } + + public VectorGroupByHashMultiKeyCountStarOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = countAggregate.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashCountStarTableLines + +#INCLUDE GroupByHashMultiKeyCountStarInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyCountStarInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputCountForNullMultiKey(nullKeyCount); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt new file mode 100644 index 0000000..e3e8c61 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt @@ -0,0 +1,70 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyDuplicateReductionOperator. +#COMMENT +#COMMENT + protected void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..dc4a138 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.duplicatereduction.VectorGroupByHashMultiKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key Native Vectorized GroupBy with no aggregation. + * + * (For more comments, see GroupByHashSingleKeyDuplicateReductionOperator.txt). + */ +public class VectorGroupByHashMultiKeyDuplicateReductionOperator + extends VectorGroupByHashMultiKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyDuplicateReductionOperator() { + super(); + } + + public VectorGroupByHashMultiKeyDuplicateReductionOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashDuplicateReductionTableLines + +#INCLUDE GroupByHashMultiKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + outputBatch.size++; + } + + doOutputMultiKeys(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt new file mode 100644 index 0000000..a9df6d1 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt @@ -0,0 +1,42 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single Key variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific ColumnVector import code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP +#IF SINGLE_KEY + + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(1)); + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + int[] columnMap = new int[] { groupByKeyExpressions[0].getOutputColumnNum() }; + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#ENDIF SINGLE_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnInclude.txt new file mode 100644 index 0000000..90a462b --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnInclude.txt @@ -0,0 +1,527 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyCountColumnOperator. +#COMMENT +#COMMENT + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNoNullsKey. + * + * Look for sequences of equal keys and determine their count. + */ + private void doNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + + // Start counting after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNoNullsKey. + * + * Scan for sequences of equal keys. The column count is simply 0 because of all NULL values -- + * but we still must create an entry in the slot table. + */ + private void doNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // No counting. + } else { + + // Current key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + + // Handle last key. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + } + + /* + * Do the NO REPEATING NULLS case for handleNoNullsKey. + * + * Look for sequence of equal keys -- look over at the non-key-column and count non-null rows. + * Even when the non-NULL row count is 0, we still must create an entry in the slot table. + */ + private void doNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + + // Handle last key. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + + /* + * <OrPhysical> batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * In general, loop over key column and process the keys. Look for sequences of equal keys. And, + * at the same time do any processing for the non-key-column counting. + * + * Here are the cases: + * + * 1) When non-key-column {REPEATING|NO REPEATING} NO NULLS, look for sequences of equal keys + * and determine their count. + * + * 2) When non-key-column REPEATING NULLS, scan for sequences of equal keys. The column count + * is simply 0 because of all NULL values -- but we still must create an entry in the + * slot table. + * + * 3) Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of + * equal keys -- look over at the non-key-column and count non-null rows. Even when the + * non-null row count is 0, we still must create an entry in the slot table. + * + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // NOTE: We don't look at the non-key column values -- we just count. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doNoNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // REPEATING NO NULLS + doNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS. + + doNoNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNoNullsColumn). + */ + private void doNullsKeyNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + + // Start counting after first no NULL key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start counting after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember we had least one NULL key. + haveNullKey = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY_ALL_NULLS + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_COLUMN_FIND_OR_CREATE_KEY + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // NOTE: We don't look at the non-key column values -- we just count. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // Non-NULL repeating non-key column. + doNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnOperator.txt new file mode 100644 index 0000000..0528ea6 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountColumnOperator.txt @@ -0,0 +1,308 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.count.VectorGroupByHashKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(non-key-column) Native Vectorized GroupBy. + * That is, the grouping is being done on a single long key and + * the counting is for a another ("non-key") column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongKeyZeroCount( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class + extends VectorGroupByHashKeyCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = countAggregate.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashCountColumnTableLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- it is either ALL NULL keys or ALL same non-NULL keys. + * + * First, we determine the number of non-NULL values in the non-key column. + * Then, whether ALL NULL keys or ALL same non-NULL keys, we create the key if necessary and + * include the new count. + * + * A NULL key is not in the slot table. It is separately represented by members haveNullKey + * and nullKeyCount. + * + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) + throws HiveException, IOException { + + /* + * First, determine the count of the non-key column for the whole batch which is covered by the + * repeating key. + */ + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + int nonKeyNonNullCount; + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + nonKeyNonNullCount = inputLogicalSize; + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + nonKeyNonNullCount = (nonKeyColVector.isNull[0] ? 0 : inputLogicalSize); + + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + nonKeyNonNullCount = 0; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + if (!nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } else { + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + if (!nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } + } + + /* + * Finally, use the non-key non-NULL count for our repeated non-NULL or NULL keys. + */ + if (keyColVector.noNulls || !keyColVector.isNull[0]) { + + // Non-NULL key. +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongKeyZeroCount( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + nonKeyNonNullCount); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKeyCount( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKeyCount( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF SINGLE_KEY + } else { + + // All NULL keys. Since we are counting a non-Key column, we must count it under the NULL + // pseudo-entry. + haveNullKey = true; + nullKeyCount += nonKeyNonNullCount; + + } + } + +#INCLUDE GroupByHashSingleKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyCountColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + outputCountForNullSingleKey( + keyColumnVector, countColumnVector, nullKeyCount); + } + +#IF LONG_KEY + outputLongZeroCountKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyInclude.txt new file mode 100644 index 0000000..cad9bf9 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyInclude.txt @@ -0,0 +1,147 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyCountKeyOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + } + + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES COUNT_KEY_FIND_OR_CREATE_KEY + } + } diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyOperator.txt new file mode 100644 index 0000000..9c96ea1 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountKeyOperator.txt @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.count.VectorGroupByHashKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on one long key and we are counting it. + * + * The NULL key is not represented in the hash table. We handle them as a special case. So, + * the find/create call for non-NULL keys looks like this: + + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeyCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashCountKeyTableLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists but leave its count as 0. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongKeyNonZeroCount( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKeyCount( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKeyCount( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SINGLE_KEY + } else { + + // We note we encountered a repeating NULL key. But there will be no count for it -- + // just NULL. + haveNullKey = true; + } + } + +#INCLUDE GroupByHashSingleKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyCountKeyInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + + // COUNT(column) does not maintain a count for NULLs and since we are processing the key + // our count is always 0. + outputCountForNullSingleKey( + keyColumnVector, countColumnVector, /* nullKeyCount */ 0); + } + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarInclude.txt new file mode 100644 index 0000000..ee4d418 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarInclude.txt @@ -0,0 +1,142 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyCountStarOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + } + + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY +2 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#USE_LINES COUNT_STAR_FIND_OR_CREATE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarOperator.txt new file mode 100644 index 0000000..70d5b58 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCountStarOperator.txt @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.count.VectorGroupByHashKeyCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(*) Native Vectorized GroupBy that is lookup on + * a single long using a specialized hash map. + * + Count Star + + NULL key has separate counter. + + findOrCreateLongKeyNonZeroCount( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeyCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashCountStarTableLines +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists AND count it. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongKeyNonZeroCount( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKeyCount( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKeyCount( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SINGLE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + nullKeyCount += inputLogicalSize; + } + } + +#INCLUDE GroupByHashSingleKeyCountStarInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyCountStarInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + outputCountForNullSingleKey( + keyColumnVector, countColumnVector, nullKeyCount); + } + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDecimal64Operator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDecimal64Operator.txt new file mode 100644 index 0000000..7692f67 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDecimal64Operator.txt @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a DECIMAL_64 Native Vectorized GroupBy. + */ +public class + extends VectorGroupByHashLong { + + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt new file mode 100644 index 0000000..f6c48f1 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt @@ -0,0 +1,132 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyDuplicateReductionOperator. +#COMMENT +#COMMENT + /* + * batch processing for NO NULLS key case. + * + * Do find/create on each key. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current key ended. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + + // Handle last key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + } + + /* + * batch processing for NULLS key case. + * + * For all NULL keys cases we note NULL key exists since we don't represent it in the slot table. + * + * Do find/create on each non-NULL key. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + if (currKeyIsNull) { + + // We note we encountered a NULL key. + haveNullKey = true; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + // Handle last key. + if (!currKeyIsNull) { +#USE_LINES DUPLICATE_REDUCTION_CREATE_OR_IGNORE_KEY + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..df6331d --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.duplicatereduction.VectorGroupByHashKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key Native Vectorized GroupBy with no aggregation. + * + * It is used on a single key for duplicate reduction. + * + * Final duplicate elimination must be done in reduce-shuffle and a reducer since with hash table + * overflow some duplicates can slip through. And, of course, other vertices may contribute + * the same keys. + */ +public class + extends VectorGroupByHashKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashDuplicateReductionTableLines + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For the all NULL or all 0 keys case we note NULL/0 key exists. Otherwise, we do the + * find/create. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + if (repeatingKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + createOrIgnoreLongDuplicateReductionKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + createOrIgnoreBytesDuplicateReductionKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + createOrIgnoreBytesDuplicateReductionKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength)); +#ENDIF SINGLE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + } + } + +#INCLUDE GroupByHashSingleKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + outputBatch.size++; + } + +#IF LONG_KEY + doOutputLongKeys(keyColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeys(keyColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSerializeKeys(keyColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordColumnInclude.txt new file mode 100644 index 0000000..ebca6ab --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordColumnInclude.txt @@ -0,0 +1,806 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyWordColumnOperator. +#COMMENT +#COMMENT + /* + * Do the aggregation column NO NULLS case for handleNoNullsKey. + * + * Look for sequences of equal keys and determine their aggregation. + */ + private void doNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; + +#IF LOGICAL_BATCH_PROCESSING + aggregate = vector[selected[0]]; + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + aggregate = vector[0]; + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + final value = vector[batchIndex]; + + // Next key. + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +4 + } else { + + // Current key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new key's aggregation. + aggregate = value; + } + } + + // Handle last key. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION + } + + /* + * Do the non-key-column REPEATING NO NULLS case for handleNoNullsKey. + * + * Scan for sequences of equal keys. The column count is simply 0 because of all NULL values -- + * but we still must create an entry in the slot table. + */ + private void doNoNullsKeyRepeatingNoNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since there is not aggregation because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + final repeatedValue = aggregateColVector.vector[0]; + aggregate = 0; + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE +4 + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + + // Handle last key. + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNoNullsKey. + * + * Scan for sequences of equal keys. The column count is simply 0 because of all NULL values -- + * but we still must create an entry in the slot table. + */ + private void doNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since there is not aggregation because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + // No aggregating -- value is NULL. + } else { + + // Current key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + + // Handle last key. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY + } + + /* + * Do the aggregation NULLS case for handleNoNullsKey. + * + * Look for sequence of equal keys -- look over at the non-key-column and count non-null rows. + * Even when the non-NULL row count is 0, we still must create an entry in the slot table. + */ + private void doNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] aggrColIsNull = aggregateColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; + +#IF LOGICAL_BATCH_PROCESSING + boolean isAggregateNull = aggrColIsNull[firstBatchIndex]; + aggregate = vector[firstBatchIndex]; // Undefined when isAggregateNull true. + + // Start counting after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + boolean isAggregateNull = aggrColIsNull[0]; + aggregate = vector[0]; // Undefined when isAggregateNull true. + + // Start aggregating after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + // Next key. + +#USE_LINES COMMON_GET_NEXT_KEY +#USE_LINES COMMON_IF_NEXT_EQUALS_CURRENT + + if (!aggrColIsNull[batchIndex]) { + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGREGATE_NULLS_COLUMN_VALUE +2 + } + } else { + + // Current key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +4 + + // New current key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + + } + } + + // Handle last key. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION + } + + /* + * <OrPhysical> batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * In general, loop over key column and process the keys. Look for sequences of equal keys. And, + * at the same time do any processing for the non-key-column counting. + * + * Here are the cases: + * + * 1) When non-key-column {REPEATING|NO REPEATING} NO NULLS, look for sequences of equal keys + * and determine their count. + * + * 2) When non-key-column REPEATING NULLS, scan for sequences of equal keys. The column count + * is simply 0 because of all NULL values -- but we still must create an entry in the + * slot table. + * + * 3) Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of + * equal keys -- look over at the non-key-column and count non-null rows. Even when the + * non-null row count is 0, we still must create an entry in the slot table. + * + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + aggregateColVector = + () batch.cols[wordAggregateColumnNum]; + + if (aggregateColVector.isRepeating) { + + // Aggregation: REPEATING, NULLS Possible. + + if (aggregateColVector.noNulls || !aggregateColVector.isNull[0]) { + + doNoNullsKeyRepeatingNoNullColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + } else { + + doNoNullsKeyRepeatingNullColumn( + batch, inputLogicalSize, keyColVector); + + } + } else if (aggregateColVector.noNulls) { + + // Aggregation: NO REPEATING, NO NULLS + + doNoNullsKeyNoNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } else { + + // Aggregation: NOT REPEATING, NULLS. + + doNoNullsKeyNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } + } + + /* + * Do the non-key-column NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNullsKeyNoNullsColumn). + */ + private void doNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; + +#IF LOGICAL_BATCH_PROCESSING + aggregate = vector[selected[0]]; + + // Start aggregating after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + aggregate = vector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + // Next key is NULL. + + if (currKeyIsNull) { + + // Current NULL key series continues. + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +6 + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new NULL key's aggregation. + aggregate = vector[batchIndex]; + } + + } else { + + // Non-NULL next key. + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES NULL_KEY_ENDED_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +6 + } else { + + // Key mismatch. Current non-NULL key ended. + +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +4 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new non-NULL key's aggregation. + aggregate = vector[batchIndex]; + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES NULL_KEY_ENDED_NO_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +2 + } + } + + /* + * Do the aggregation REPEATING NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNullsKeyRepeatingNoNullColumn). + */ + private void doNullsKeyRepeatingNoNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + final repeatedValue = aggregateColVector.vector[0]; + aggregate = 0; + int count = 1; + + // Start after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE +6 + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + count = 1; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE +6 + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES NULL_KEY_ENDED_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Key mismatch. Current non-NULL key ended. + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE +4 + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + + count = 1; + } + } + } + + // Handle last key. + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE + + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES NULL_KEY_ENDED_NO_NULLS_AGGREGATION + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION + } + } + + /* + * Do the aggregation REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + // Start after first key. +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // Current NULL key series continues. + + // No aggregating the NULL value. + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +6 + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES COMMON_GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key with a NULL value. +#USE_LINES WORD_NULL_KEY_ENDED_ALL_NULLS +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Current non-NULL key series continues. + + // No aggregating of our NULL column. + } else { + + // Key mismatch. Current non-NULL key ended. + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY + } + } + } + + if (currKeyIsNull) { + + // Remember globally we have a NULL key with a NULL value. +#USE_LINES WORD_NULL_KEY_ENDED_ALL_NULLS +2 + } else { + + // Do appropriate {create init / find and ignore NULL} hash map entry. +#USE_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +2 + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, + aggregateColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES COMMON_KEY_VECTOR_VARIABLES + + boolean[] aggrColIsNull = aggregateColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES COMMON_LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES COMMON_PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + [] vector = aggregateColVector.vector; + +#IF LOGICAL_BATCH_PROCESSING + boolean isAggregateNull = aggrColIsNull[firstBatchIndex]; + aggregate = vector[firstBatchIndex]; // Undefined when isAggregateNull true. + + // Start aggregating after first key. + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + boolean isAggregateNull = aggrColIsNull[0]; + aggregate = vector[0]; // Undefined when isAggregateNull true. + + // Start counting after first key. + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyColIsNull[batchIndex]) { + + // Next key is NULL. + + if (currKeyIsNull) { + + // Current NULL key series continues. + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGREGATE_NULLS_COLUMN_VALUE +6 + + } else { + + // Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +6 + + // New NULL key. + currKeyIsNull = true; + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + } + + } else { + + // Non-NULL next key. + +#USE_LINES COMMON_GET_NEXT_KEY +2 + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION +6 + + // New non-NULL current key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + +#USE_LINES COMMON_ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Current non-NULL key series continues. + + final value = vector[batchIndex]; +#USE_LINES WORD_AGGREGATE_NULLS_COLUMN_VALUE +6 + + } else { + + // Key mismatch. Current non-NULL key ended. + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +6 + + // New non-NULL key. +#USE_LINES COMMON_NEW_CURRENT_KEY +2 + + // Initialize new key's aggregation. + isAggregateNull = aggrColIsNull[batchIndex]; + aggregate = vector[batchIndex]; // Undefined when isAggregateNull true. + } + } + } + + // Handle last key. + if (currKeyIsNull) { + + // Remember globally we have a NULL key and do appropriate aggregation. +#USE_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION +2 + } else { + + // Do appropriate {create init / find and aggregate} hash map entry. +#USE_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION +2 + } + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have aggregation work. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any aggregation work. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL pseudo-entry processing. + * + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + aggregateColVector = + () batch.cols[wordAggregateColumnNum]; + + if (aggregateColVector.isRepeating) { + + // Aggregation: REPEATING, NULLS Possible. + + if (aggregateColVector.noNulls || !aggregateColVector.isNull[0]) { + + doNullsKeyRepeatingNoNullColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + } else { + + doNullsKeyRepeatingNullColumn( + batch, inputLogicalSize, keyColVector); + } + } else if (aggregateColVector.noNulls) { + + // Aggregation: NO REPEATING, NO NULLS + + doNullsKeyNoNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } else { + + // Aggregation: NO REPEATING, NULLS Possible. + + doNullsKeyNullsColumn( + batch, inputLogicalSize, keyColVector, aggregateColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordColumnOperator.txt new file mode 100644 index 0000000..6de4314 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyWordColumnOperator.txt @@ -0,0 +1,416 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.word.VectorGroupByHashKeyWordTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES COMMON_KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key {MAX|MIN|SUM}(non-key-column) Native Vectorized GroupBy. + * + * UNDONE + * That is, the grouping is being done on a single long key and + * the counting is for a another ("non-key") column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongKeyZeroCount( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class + extends VectorGroupByHashKeyWordTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int wordAggregateColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient boolean isNullKeyAggregateNull; + + protected transient nullKeyAggregate; + +#USE_LINES COMMON_KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + wordAggregateColumnNum = wordAggregate.getWordAggregateColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + isNullKeyAggregateNull = true; + nullKeyAggregate = 0; + } +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashWordColumnCommonLines +#INCLUDE GroupByHashWordColumnTableLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- it is either ALL NULL keys or ALL same non-NULL keys. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) + throws HiveException, IOException { + + /* + * First, determine the aggregation of the non-key column for the whole batch which is covered + * by the repeating key. + */ + ColumnVector aggregateColVector = batch.cols[wordAggregateColumnNum]; + [] vector = (() aggregateColVector).vector; + boolean isAggregateNull = true; + aggregate = 0; + final int count = inputLogicalSize; + if (aggregateColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + if (!aggregateColVector.isNull[0]) { + + isAggregateNull = false; + final repeatedValue = vector[0]; + +#USE_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE +4 + } + + } if (aggregateColVector.noNulls) { + + // Non-Key: NOT REPEATING, NO NULLS. + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + isAggregateNull = false; + aggregate = vector[selected[0]]; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +6 + } + } else { + + isAggregateNull = false; + aggregate = vector[0]; + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +6 + } + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + boolean[] nonKeyIsNull = aggregateColVector.isNull; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + // Scan for first non-NULL column value... + int i = 0; + int batchIndex; + while (true) { + batchIndex = selected[i]; + if (!nonKeyIsNull[batchIndex]) { + break; + } + if (++i >= inputLogicalSize) { + break; + } + } + if (i < inputLogicalSize) { + isAggregateNull = false; + aggregate = vector[batchIndex]; + i++; + for (; i < inputLogicalSize; i++) { + batchIndex = selected[i]; + if (!nonKeyIsNull[batchIndex]) { + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +10 + } + } + } + } else { + + // Scan for first non-NULL column value... + int batchIndex = 0; + while (true) { + if (!nonKeyIsNull[batchIndex]) { + break; + } + if (++batchIndex >= inputLogicalSize) { + break; + } + } + if (batchIndex < inputLogicalSize) { + isAggregateNull = false; + aggregate = vector[batchIndex++]; + for (; batchIndex < inputLogicalSize; batchIndex++) { + if (!nonKeyIsNull[batchIndex]) { + final value = vector[batchIndex]; + +#USE_LINES WORD_AGGREGATE_COLUMN_VALUE +10 + } + } + } + } + } + + /* + * Finally, use the non-key non-NULL aggregation for our repeated non-NULL or NULL keys. + */ + if (keyColVector.noNulls || !keyColVector.isNull[0]) { + + // Non-NULL key. +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + final int hashCode = HashCodeUtil.calculateLongHashCode(repeatingKey); + if (isAggregateNull) { + createOrIgnoreLongKeyNullEntry( + repeatingKey, + hashCode); + } else { + findOrCreateLongKeyWord( + repeatingKey, + hashCode, + aggregate); + if (currentIsAggregationNeeded) { + +#USE_LINES WORD_FIND_LONG_KEY_AGGREGATE_CURRENT + } + } +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + final int hashCode = + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength); + if (isAggregateNull) { + createOrIgnoreBytesKeyNullEntry( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + hashCode); + } else { + findOrCreateBytesKeyWord( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + hashCode, + aggregate); + if (currentIsAggregationNeeded) { + +#USE_LINES WORD_FIND_BYTES_KEY_AGGREGATE_CURRENT + } + } +#ENDIF STRING_KEY +#IF SINGLE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + final int hashCode = + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength); + if (isAggregateNull) { + createOrIgnoreBytesKeyNullEntry( + repeatingKey, 0, repeatingKeyLength, + hashCode); + } else { + findOrCreateBytesKeyWord( + repeatingKey, 0, repeatingKeyLength, + hashCode, + aggregate); + if (currentIsAggregationNeeded) { + +#USE_LINES WORD_FIND_BYTES_KEY_AGGREGATE_CURRENT + } + } +#ENDIF SINGLE_KEY + } else { + + // All NULL keys. Since we are aggregating a non-Key column, we must aggregate it under the + // NULL pseudo-entry. + haveNullKey = true; + if (!isAggregateNull) { + + if (isNullKeyAggregateNull) { + isNullKeyAggregateNull = false; + nullKeyAggregate = aggregate; + } else { + +#USE_LINES WORD_NULL_KEY_AGGREGATION +6 + } + } + + } + } + +#INCLUDE GroupByHashSingleKeyWordColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyWordColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and aggregate pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + keyColumnVector = () outputBatch.cols[0]; + + aggregateColumnVector = () outputBatch.cols[1]; + + if (haveNullKey) { + outputAggregateForNullSingleKey( + keyColumnVector, aggregateColumnVector, isNullKeyAggregateNull, nullKeyAggregate); + } + +#IF LONG_KEY + outputLongKeyAndAggregatePairs( + keyColumnVector, aggregateColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndAggregatePairs( + keyColumnVector, aggregateColumnVector); +#ENDIF STRING_KEY +#IF SINGLE_KEY + doOutputSingleKeyAndAggregatePairs( + keyColumnVector, aggregateColumnVector); +#ENDIF SINGLE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordColumnCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordColumnCommonLines.txt new file mode 100644 index 0000000..1d23a3a --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordColumnCommonLines.txt @@ -0,0 +1,165 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to non-COUNT non-key-column aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_FIND_LONG_KEY_AGGREGATE_CURRENT +#IF MIN + // MIN aggregation. + if (aggregate < currentAggregationWord) { + replaceLongKeyWord(aggregate); + } +#ENDIF MIN +#IF MAX + // MAX aggregation. + if (aggregate > currentAggregationWord) { + replaceLongKeyWord(aggregate); + } +#ENDIF MAX +#IF SUM + // SUM aggregation. + replaceLongKeyWord(currentAggregationWord + aggregate); +#ENDIF SUM +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_FIND_BYTES_KEY_AGGREGATE_CURRENT +#IF MIN + // MIN aggregation. + if (aggregate < currentAggregationWord) { + replaceBytesKeyWord(aggregate); + } +#ENDIF MIN +#IF MAX + // MAX aggregation. + if (aggregate > currentAggregationWord) { + replaceBytesKeyWord(aggregate); + } +#ENDIF MAX +#IF SUM + // SUM aggregation. + replaceBytesKeyWord(currentAggregationWord + aggregate); +#ENDIF SUM +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_REPEATED_AGGREGATE_COLUMN_VALUE +#IF MIN + // MIN repeated aggregation is just the value. + aggregate = repeatedValue; +#ENDIF MIN +#IF MAX + // MAX repeated aggregation is just the value. + aggregate = repeatedValue; +#ENDIF MAX +#IF SUM + // SUM repeated aggregation calculation. + aggregate = repeatedValue * count; +#ENDIF SUM +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGREGATE_COLUMN_VALUE +#IF MIN + // MIN aggregation. + if (value < aggregate) { + aggregate = value; + } +#ENDIF MIN +#IF MAX + // MAX aggregation. + if (value > aggregate) { + aggregate = value; + } +#ENDIF MAX +#IF SUM + // SUM aggregation. + aggregate += value; +#ENDIF SUM +#END_LINES +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_AGGREGATE_NULLS_COLUMN_VALUE + if (isAggregateNull) { + isAggregateNull = false; + aggregate = value; + } else { + +#IF MIN + // MIN aggregation. + if (value < aggregate) { + aggregate = value; + } +#ENDIF MIN +#IF MAX + // MAX aggregation. + if (value > aggregate) { + aggregate = value; + } +#ENDIF MAX +#IF SUM + // SUM aggregation. + aggregate += value; +#ENDIF SUM + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current NULL key ended -- +#COMMENT +#BEGIN_LINES WORD_NULL_KEY_ENDED_ALL_NULLS + if (!haveNullKey) { + + // We now have a NULL key for NULL value. + haveNullKey = true; + isNullKeyAggregateNull = true; + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_NULL_KEY_AGGREGATION +#IF MIN + // MIN aggregation against NULL key aggregate. + if (aggregate < nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF MIN +#IF MAX + // MAX aggregation against NULL key aggregate. + if (aggregate > nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF MAX +#IF SUM + // SUM aggregation against NULL key aggregate. + nullKeyAggregate += aggregate; +#ENDIF SUM +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordColumnTableLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordColumnTableLines.txt new file mode 100644 index 0000000..882ac40 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashWordColumnTableLines.txt @@ -0,0 +1,278 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to non-COUNT non-key-column aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current NULL key ended -- +#COMMENT +#BEGIN_LINES WORD_NULL_KEY_ENDED_NULLS_AGGREGATION + if (!haveNullKey) { + + // We now have a NULL key. + haveNullKey = true; + isNullKeyAggregateNull = isAggregateNull; + nullKeyAggregate = aggregate; // Undefined when isAggregateNull true. + } else if (!isAggregateNull) { + + // We have something to work on. + + if (isNullKeyAggregateNull) { + + // First non-NULL aggregate for NULL key. + isNullKeyAggregateNull = false; + nullKeyAggregate = aggregate; + } else { + +#IF MIN + // MIN aggregation for current NULL key aggregate. + if (aggregate < nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF MIN +#IF MAX + // MAX aggregation for current NULL key aggregate. + if (aggregate > nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF MAX +#IF SUM + // SUM aggregation for current NULL key aggregate. + nullKeyAggregate += aggregate; +#ENDIF SUM + } + } +#END_LINES +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current NULL key ended -- +#COMMENT +#BEGIN_LINES NULL_KEY_ENDED_NO_NULLS_AGGREGATION + if (!haveNullKey || isNullKeyAggregateNull) { + + // Initialize. + haveNullKey = true; + isNullKeyAggregateNull = false; + nullKeyAggregate = aggregate; + } else { + +#IF MIN + // MIN aggregation for current NULL key aggregate. + if (aggregate < nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF MIN +#IF MAX + // MAX aggregation for current NULL key aggregate. + if (aggregate > nullKeyAggregate) { + nullKeyAggregate = aggregate; + } +#ENDIF MAX +#IF SUM + // SUM aggregation for current NULL key aggregate. + nullKeyAggregate += aggregate; +#ENDIF SUM + } +#END_LINES +#COMMENT +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES CURRENT_KEY_ENDED_ALL_NULLS_AGGREGATE_COLUMN +// ***UNDONE***: CURRENT_KEY_ENDED_ALL_NULLS_AGGREGATE_COLUMN +#IF LONG_KEY +#ENDIF LONG_KEY +#IF STRING_KEY +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_FIND_OR_CREATE_KEY_NO_NULLS_AGGREGATION +#IF LONG_KEY + findOrCreateLongKeyWord( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + aggregate); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyWord( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + aggregate); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyWord( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + aggregate); +#ENDIF SINGLE_KEY||MULTI_KEY + if (currentIsAggregationNeeded) { + +#IF LONG_KEY +#IF MIN + // MIN aggregation against LONG key hash table entry. + if (aggregate < currentAggregationWord) { + replaceLongKeyWord(aggregate); + } +#ENDIF MIN +#IF MAX + // MAX aggregation against LONG key hash table entry. + if (aggregate > currentAggregationWord) { + replaceLongKeyWord(aggregate); + } +#ENDIF MAX +#IF SUM + // SUM aggregation against LONG key hash table entry. + replaceLongKeyWord(currentAggregationWord + aggregate); +#ENDIF SUM +#ENDIF LONG_KEY +#IF STRING_KEY||SINGLE_KEY||MULTI_KEY +#IF MIN + // MIN aggregation against BYTES key hash table entry. + if (aggregate < currentAggregationWord) { + replaceBytesKeyWord(aggregate); + } +#ENDIF MIN +#IF MAX + // MAX aggregation against BYTES key hash table entry. + if (aggregate > currentAggregationWord) { + replaceBytesKeyWord(aggregate); + } +#ENDIF MAX +#IF SUM + // SUM aggregation against BYTES key hash table entry. + replaceBytesKeyWord(currentAggregationWord + aggregate); +#ENDIF SUM +#ENDIF STRING_KEY||SINGLE_KEY||MULTI_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_FIND_OR_CREATE_KEY_NULLS_AGGREGATION + if (isAggregateNull) { +#IF LONG_KEY + createOrIgnoreLongKeyNullEntry( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); +#ENDIF LONG_KEY +#IF STRING_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SINGLE_KEY||MULTI_KEY + } else { +#IF LONG_KEY + findOrCreateLongKeyWord( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + aggregate); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKeyWord( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + aggregate); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + findOrCreateBytesKeyWord( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + aggregate); +#ENDIF SINGLE_KEY||MULTI_KEY + if (currentIsAggregationNeeded) { + +#IF LONG_KEY +#IF MIN + // MIN aggregation against LONG key hash table entry. + if (aggregate < currentAggregationWord) { + replaceLongKeyWord(aggregate); + } +#ENDIF MIN +#IF MAX + // MAX aggregation against LONG key hash table entry. + if (aggregate > currentAggregationWord) { + replaceLongKeyWord(aggregate); + } +#ENDIF MAX +#IF SUM + // SUM aggregation against LONG key hash table entry. + replaceLongKeyWord(currentAggregationWord + aggregate); +#ENDIF SUM +#ENDIF LONG_KEY +#IF STRING_KEY||SINGLE_KEY||MULTI_KEY +#IF MIN + // MIN aggregation against BYTES key hash table entry. + if (aggregate < currentAggregationWord) { + replaceBytesKeyWord(aggregate); + } +#ENDIF MIN +#IF MAX + // MAX aggregation against BYTES key hash table entry. + if (aggregate > currentAggregationWord) { + replaceBytesKeyWord(aggregate); + } +#ENDIF MAX +#IF SUM + // SUM aggregation against BYTES key hash table entry. + replaceBytesKeyWord(currentAggregationWord + aggregate); +#ENDIF SUM +#ENDIF STRING_KEY||SINGLE_KEY||MULTI_KEY + } + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT . All variations. +#COMMENT +#BEGIN_LINES WORD_CREATE_OR_IGNORE_KEY_NULL_ENTRY +#IF LONG_KEY + createOrIgnoreLongKeyNullEntry( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); +#ENDIF LONG_KEY +#IF STRING_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SINGLE_KEY||MULTI_KEY + createOrIgnoreBytesKeyNullEntry( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SINGLE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java index 494db35..0daac70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java @@ -24,7 +24,7 @@ * Represents a set of aggregation buffers to be used for a specific key for UDAF GROUP BY. * */ -public class VectorAggregationBufferRow { +public class VectorAggregationBufferRow extends VectorAggregationBufferRowBase { private VectorAggregateExpression.AggregationBuffer[] aggregationBuffers; private int version; private int index; @@ -33,10 +33,11 @@ public VectorAggregationBufferRow( VectorAggregateExpression.AggregationBuffer[] aggregationBuffers) { this.aggregationBuffers = aggregationBuffers; } - + /** * returns the aggregation buffer for an aggregation expression, by index. */ + @Override public VectorAggregateExpression.AggregationBuffer getAggregationBuffer(int bufferIndex) { return aggregationBuffers[bufferIndex]; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRowBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRowBase.java new file mode 100644 index 0000000..fa51411 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRowBase.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; + +/** + * Represents the base methods needed for a set of aggregation buffers to be used for a + * specific key for vectorized UDAF GROUP BY. + * + */ +public abstract class VectorAggregationBufferRowBase { + + public VectorAggregationBufferRowBase() { + } + + /** + * returns the aggregation buffer for an aggregation expression, by index. + */ + public abstract VectorAggregateExpression.AggregationBuffer getAggregationBuffer(int bufferIndex); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 55d2a16..e9ae3ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -729,17 +729,22 @@ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, return expr; } - public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) - throws HiveException { - VectorExpression[] vecExprs = - getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + public static void upConvertDecimal64(VectorExpression[] vecExprs, VectorizationContext vContext) + throws HiveException { final int size = vecExprs.length; for (int i = 0; i < size; i++) { VectorExpression vecExpr = vecExprs[i]; if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { - vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); + vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr, vContext); } } + } + + public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) + throws HiveException { + VectorExpression[] vecExprs = + getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + upConvertDecimal64(vecExprs, this); return vecExprs; } @@ -1688,9 +1693,11 @@ private VectorExpression createDecimal64VectorExpression(Class vectorClass, * The instantiateExpression method sets the output column and type information. */ VectorExpression vectorExpression = - instantiateExpression(vectorClass, returnTypeInfo, returnDataTypePhysicalVariation, arguments); + instantiateExpression( + vectorClass, returnTypeInfo, DataTypePhysicalVariation.DECIMAL_64, this, + arguments); if (vectorExpression == null) { - handleCouldNotInstantiateVectorExpression(vectorClass, returnTypeInfo, returnDataTypePhysicalVariation, arguments); + handleCouldNotInstantiateVectorExpression(vectorClass, returnTypeInfo, DataTypePhysicalVariation.DECIMAL_64, arguments); } vectorExpression.setInputTypeInfos(typeInfos); @@ -1796,8 +1803,9 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, return createVectorExpression(vclass, childExpr, childrenMode, returnType); } - private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeInfo resultTypeInfo) - throws HiveException { + private static VectorExpression createDecimal64ToDecimalConversion(int colIndex, + TypeInfo resultTypeInfo, VectorizationContext vContext) + throws HiveException { Object [] conversionArgs = new Object[1]; conversionArgs[0] = colIndex; VectorExpression vectorExpression = @@ -1805,6 +1813,7 @@ private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeIn ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, + vContext, conversionArgs); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression( @@ -1831,17 +1840,18 @@ public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs) vecExpr.getOutputDataTypePhysicalVariation(); if (outputDataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { vecExprs[i] = - wrapWithDecimal64ToDecimalConversion(vecExpr); + wrapWithDecimal64ToDecimalConversion(vecExpr, this); } } } } - public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) + public static VectorExpression wrapWithDecimal64ToDecimalConversion( + VectorExpression inputExpression, VectorizationContext vContext) throws HiveException { VectorExpression wrapExpression = createDecimal64ToDecimalConversion( - inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo()); + inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo(), vContext); if (inputExpression instanceof IdentityExpression) { return wrapExpression; } @@ -1887,11 +1897,14 @@ private VectorExpression createVectorExpression(Class vectorClass, // In this method, we must only process non-Decimal64 column vectors. // Convert Decimal64 columns to regular decimal. - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + DataTypePhysicalVariation dataTypePhysicalVariation = + getDataTypePhysicalVariation(colIndex); + if (dataTypePhysicalVariation != null && + dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { // FUTURE: Can we reuse this conversion? - VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo); + VectorExpression vChild = + createDecimal64ToDecimalConversion(colIndex, childTypeInfo, this); children.add(vChild); arguments[i] = vChild.getOutputColumnNum(); @@ -1920,7 +1933,10 @@ private VectorExpression createVectorExpression(Class vectorClass, throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); } } - VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); + VectorExpression vectorExpression = + instantiateExpression( + vectorClass, returnType, DataTypePhysicalVariation.NONE, this, + arguments); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); } @@ -1939,7 +1955,7 @@ private VectorExpression createVectorExpression(Class vectorClass, return vectorExpression; } - private void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, + private static void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, DataTypePhysicalVariation dataTypePhysicalVariation, Object[] arguments) throws HiveException { String displayString = "Could not instantiate vector expression class " + vectorClass.getName() + " for arguments " + Arrays.toString(arguments) + " return type " + @@ -1954,7 +1970,7 @@ private void handleCouldNotInstantiateVectorExpression(Class vectorClass, Typ return VectorExpressionDescriptor.Mode.PROJECTION; } - private String getNewInstanceArgumentString(Object [] args) { + private static String getNewInstanceArgumentString(Object [] args) { if (args == null) { return "arguments: NULL"; } @@ -1994,8 +2010,9 @@ public static String getStackTraceAsSingleLine(Throwable e) { return cleaned; } - public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, - DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) + public static VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, + DataTypePhysicalVariation returnDataTypePhysicalVariation, VectorizationContext vContext, + Object...args) throws HiveException { VectorExpression ve = null; Constructor ctor = getConstructor(vclass); @@ -2005,15 +2022,19 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy try { ve = (VectorExpression) ctor.newInstance(); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with 0 arguments" + + ", exception: " + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength) { try { ve = (VectorExpression) ctor.newInstance(args); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with " + getNewInstanceArgumentString(args) + + ", exception: " + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength + 1) { // Additional argument is needed, which is the outputcolumn. @@ -2028,7 +2049,7 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy // Special handling for decimal because decimal types need scale and precision parameter. // This special handling should be avoided by using returnType uniformly for all cases. final int outputColumnNum = - ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); + vContext.ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); newArgs = Arrays.copyOf(args, numParams); newArgs[numParams-1] = outputColumnNum; @@ -2042,8 +2063,10 @@ public VectorExpression instantiateExpression(Class vclass, TypeInfo returnTy ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with arguments " + getNewInstanceArgumentString(newArgs) + + ", exception: " + getStackTraceAsSingleLine(ex)); } } // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. @@ -2891,7 +2914,7 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { // Do Decimal64 conversion instead. - return createDecimal64ToDecimalConversion(colIndex, returnType); + return createDecimal64ToDecimalConversion(colIndex, returnType, this); } else { return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); @@ -3895,7 +3918,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio return ts; } - private Constructor getConstructor(Class cl) throws HiveException { + private static Constructor getConstructor(Class cl) throws HiveException { try { Constructor [] ctors = cl.getDeclaredConstructors(); if (ctors.length == 1) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java new file mode 100644 index 0000000..cd883d6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.WordAggregate; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; + +/** + * This class is common operator class of Native Vectorized GroupBy that has common + * initialization logic. + */ +public abstract class VectorGroupByCommon + extends Operator + implements VectorizationContextRegion, VectorizationOperator { + + private static final long serialVersionUID = 1L; + + protected VectorGroupByDesc vectorDesc; + + protected VectorGroupByInfo vectorGroupByInfo; + + protected VectorizationContext vContext; + + // Create a new outgoing vectorization context because column name map will change. + protected VectorizationContext vOutContext; + + protected VectorExpression[] groupByKeyExpressions; + + protected VectorAggregationDesc[] vectorAggregationDescs; + + protected AggregationVariation aggregationVariation; + + protected CountAggregate countAggregate; + protected WordAggregate wordAggregate; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + + // For debug tracing: the name of the map or reduce task. + protected transient String taskName; + + // Debug display. + protected transient long batchCounter; + + public VectorGroupByCommon() { + super(); + } + + public static int INT_PER_LONG_COUNT = Long.SIZE / Integer.SIZE; + + public VectorGroupByCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx); + + GroupByDesc desc = (GroupByDesc) conf; + this.conf = desc; + this.vectorDesc = (VectorGroupByDesc) vectorDesc; + vectorGroupByInfo = this.vectorDesc.getVectorGroupByInfo(); + + this.vContext = vContext; + + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + + groupByKeyExpressions = this.vectorDesc.getKeyExpressions(); + + vectorAggregationDescs = this.vectorDesc.getVecAggrDescs(); + + aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + + countAggregate = vectorGroupByInfo.getCountAggregation(); + wordAggregate = vectorGroupByInfo.getWordAggregation(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + if (work == null) { + taskName = "none"; + } else { + taskName = work.getName(); + } + + batchCounter = 0; + } + + /** + * Implements the getName function for the Node Interface. + * + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + public static String getOperatorName() { + return "GBY"; + } + + @Override + public VectorizationContext getOutputVectorizationContext() { + return vOutContext; + } + + @Override + public VectorizationContext getInputVectorizationContext() { + return vContext; + } + + @Override + public VectorDesc getVectorDesc() { + return vectorDesc; + } + + @Override + public OperatorType getType() { + return OperatorType.GROUPBY; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java new file mode 100644 index 0000000..1497f99 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class is common operator class of Native Vectorized GroupBy for output generation. + * Taking the aggregations and filling up the output batch. + */ +public abstract class VectorGroupByCommonOutput + extends VectorGroupByCommon { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient VectorizedRowBatch outputBatch; + + private transient VectorizedRowBatchCtx vrbCtx; + + private transient TypeInfo[] outputTypeInfos; + private transient DataTypePhysicalVariation[] outputDataTypePhysicalVariations; + + private transient StandardStructObjectInspector standardOutputObjInspector; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByCommonOutput() { + super(); + } + + public VectorGroupByCommonOutput(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + List objectInspectors = new ArrayList(); + + List outputFieldNames = conf.getOutputColumnNames(); + + final int keyCount = (groupByKeyExpressions == null ? 0 : groupByKeyExpressions.length); + final int aggrCount = (vectorAggregationDescs == null ? 0 : vectorAggregationDescs.length); + final int outputCount = keyCount + aggrCount; + outputTypeInfos = new TypeInfo[outputCount]; + outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputCount]; + int outputTypesIndex = 0; + + for(int i = 0; i < keyCount; ++i) { + VectorExpression keyExpression = groupByKeyExpressions[i]; + TypeInfo outputTypeInfo = keyExpression.getOutputTypeInfo(); + outputTypeInfos[outputTypesIndex] = outputTypeInfo; + DataTypePhysicalVariation outputDataTypePhysicalVariation = + keyExpression.getOutputDataTypePhysicalVariation(); + outputDataTypePhysicalVariations[outputTypesIndex++] = outputDataTypePhysicalVariation; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + objectInspectors.add(objInsp); + } + + for(int i = 0; i < aggrCount; ++i) { + VectorAggregationDesc vecAggrDesc = vectorAggregationDescs[i]; + TypeInfo outputTypeInfo = vecAggrDesc.getOutputTypeInfo(); + outputTypeInfos[outputTypesIndex] = outputTypeInfo; + outputDataTypePhysicalVariations[outputTypesIndex++] = + vecAggrDesc.getOutputDataTypePhysicalVariation(); + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo); + objectInspectors.add(objInsp); + } + + standardOutputObjInspector = + ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors); + outputObjInspector = standardOutputObjInspector; + + /** + * Setup the output batch and vectorization context for downstream operators. + */ + vrbCtx = new VectorizedRowBatchCtx( + conf.getOutputColumnNames().toArray(new String[0]), + outputTypeInfos, + outputDataTypePhysicalVariations, + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vOutContext.getScratchColumnTypeNames(), + vOutContext.getScratchDataTypePhysicalVariations()); + + outputBatch = vrbCtx.createVectorizedRowBatch(); + } + + public void forwardOutputBatch(VectorizedRowBatch outputBatch) throws HiveException { + + forward(outputBatch, null); + + outputBatch.reset(); + } + + /** + * Copy all of the keys and aggregations to the output batch. + */ + protected abstract void outputGroupBy() throws HiveException; + + protected void flushGroupBy() throws HiveException { + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + } + + /** + * On close, make sure a partially filled overflow batch gets forwarded. + */ + @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + flushGroupBy(); + } + LOG.debug("VectorGroupByCommonOutputOperator closeOp " + batchCounter + " batches processed"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java new file mode 100644 index 0000000..8ef9c52 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.VectorGroupByCommonOutput; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash operator class of Native Vectorized GroupBy for hash related + * initialization logic. + */ +public abstract class VectorGroupByHashCommon + extends VectorGroupByCommonOutput { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashCommon.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient long hashGroupByMemoryAvailableByteLength; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashCommon() { + super(); + } + + public VectorGroupByHashCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final float memoryPercentage = conf.getGroupByMemoryUsage(); + final int testMaxMemoryAvailable = vectorGroupByInfo.getTestGroupByMaxMemoryAvailable(); + final long maxMemoryAvailable = + (testMaxMemoryAvailable == -1 ? + conf.getMaxMemoryAvailable() : testMaxMemoryAvailable); + hashGroupByMemoryAvailableByteLength = (long) (memoryPercentage * maxMemoryAvailable); + } + + /* + * Return the power of 2 that is equal to or next below a value. + * + * Example: + * 100000b = 2^5 = 32 + * where Long.numberOfLeadingZeros returns (64 - 6) = 58 + * and the result = 5. + * + * Replacing any set of lower 0's with 1's doesn't change the result. + * Or, numbers 32 to 63 return 5. + * + */ + public static int floorPowerOf2(long a) { + if (a == 0) { + return 0; + } + final int floorLeadingZerosCount = Long.numberOfLeadingZeros(a); + final int result = Long.SIZE - floorLeadingZerosCount - 1; + return result; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java new file mode 100644 index 0000000..e20d248 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * This class is common hash operator class of Native Vectorized GroupBy with common operator + * logic for checking key limits and the common process method logic. + */ +public abstract class VectorGroupByHashOperatorBase + extends VectorGroupByHashTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashOperatorBase() { + super(); + } + + public VectorGroupByHashOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + determineInitialHashTableSize(); + + allocateHashTable(); + } + + protected void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * If the hash table has less than the worst-case inputLogicalSize keys that + * could be added, then flush the current hash table entries and clear it. + */ + checkKeyLimitOncePerBatch(inputLogicalSize); + } + + protected abstract void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException; + + /* + * Common process method that does common work then drives the specialized Operator classes with + * the doBeforeMainLoopWork and doMainLoop overrides. + */ + @Override + public void process(Object row, int tag) throws HiveException { + + try { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + batchCounter++; + + final int inputLogicalSize = batch.size; + + if (inputLogicalSize == 0) { + return; + } + + /* + * Perform any key expressions. Results will go into scratch columns. + */ + if (groupByKeyExpressions != null) { + for (VectorExpression ve : groupByKeyExpressions) { + ve.evaluate(batch); + } + } + + doBeforeMainLoopWork(inputLogicalSize); + + doMainLoop(batch, inputLogicalSize); + + } catch (Exception e) { + throw new HiveException(e); + } + } + + protected void outputCountForNullSingleKey(ColumnVector keyColumnVector, + LongColumnVector countColumnVector, long nullKeyCount) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + countColumnVector.isNull[nullBatchIndex] = false; + countColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } + + protected void outputAggregateForNullSingleKey(ColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector, boolean isNullKeyAggregateNull, + long nullKeyAggregate) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + aggregateColumnVector.isNull[nullBatchIndex] = isNullKeyAggregateNull; + aggregateColumnVector.vector[nullBatchIndex] = nullKeyAggregate; + + outputBatch.size++; + } + + protected void outputAggregateForNullSingleKey(ColumnVector keyColumnVector, + DoubleColumnVector aggregateColumnVector, boolean isNullKeyAggregateNull, + double nullKeyAggregate) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + aggregateColumnVector.isNull[nullBatchIndex] = isNullKeyAggregateNull; + aggregateColumnVector.vector[nullBatchIndex] = nullKeyAggregate; + + outputBatch.size++; + } + + protected void outputCountForNullMultiKey(long nullKeyCount) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + LongColumnVector countKeyColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + countKeyColumnVector.isNull[nullBatchIndex] = false; + countKeyColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java new file mode 100644 index 0000000..a9779db --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java @@ -0,0 +1,350 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash table class of Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashTable + extends VectorGroupByHashCommon { + + private static final long serialVersionUID = 1L; + + private static final String CLASS_NAME = VectorGroupByHashTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + private boolean isBytesHashTable; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // How many times we encountered a limit on the hash table and had to flush and recreate. + private long flushAndRecreateCount; + + // Memory available in bytes for the slot table, and when we have bytes keys, the memory available + // for the key store. + protected transient long hashTableMemoryAvailableByteLength; + protected transient long keyStoreMemoryAvailableByteLength; + + // The logical size and power of 2 mask of the hash table + protected transient int logicalHashBucketCount; + protected transient int logicalHashBucketMask; + + // The number of longs in the hash table slot array. It is the logical size * entries per slot. + protected int slotPhysicalArraySize; + + // The maximum number of keys we'll keep in the hash table before flushing. + protected transient int hashTableKeyCountLimit; + + // The slot table with 1, 2, 3, etc longs per entry. + protected transient long[] slotMultiples; + + // The key count and largest number of misses in our quadratic probing style hash table. + // Maintained by the hash table variations. + protected transient int keyCount; + protected transient int largestNumberOfSteps; + + // Byte length for WriteBuffers segments in the VectorKeyStore used for bytes keys + protected transient int keyStoreByteSize; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashTable() { + super(); + } + + public VectorGroupByHashTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + isBytesHashTable = + (this.vectorDesc.getVectorGroupByInfo().getHashTableKeyType() != HashTableKeyType.LONG); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + flushAndRecreateCount = 0; + + divvyUpHashGroupByMemory(); + } + + public long getFlushAndStartOverCount() { + return flushAndRecreateCount; + } + + public abstract int getHashTableMultiple(); + + /* + * Decide how to apportion memory for the slot table, and for the key store when we have bytes + * keys. (Single long keys are stored in the slot table). + */ + private void divvyUpHashGroupByMemory() { + + /* + * CONCERN: + * Do we really want a hash table to use the maximum supplied memory immediately? + * That could waste memory that other operators could use. And, cause Java GC + * issues because of how large the single slot table array is. Large hash tables + * with small keys sets could cause lots of unnecessary cold RAM hits. There is a tension + * here, of course. Too small a table and there will be more insert collisions. + * + * In contrast, the current VectorGroupByOperator and GroupByOperator classes use a + * Java HeapMap which automatically grows over time. + * + * The issues here are similar to MapJoin, except we have the possibility of using a smaller + * hash table and flushing everything to Reduce. Then, creating a larger slot table instead + * of zeroing the current one. MapJoin cannot flush -- it either needs to expand its + * hash tables to hold everything or spill some of the data to secondary storage (Hybrid Grace). + */ + + if (isBytesHashTable) { + + // UNDONE: Use key size estimates to make better decision than half... + final long half = hashGroupByMemoryAvailableByteLength / 2; + hashTableMemoryAvailableByteLength = half; + keyStoreMemoryAvailableByteLength = half; + } else { + hashTableMemoryAvailableByteLength = hashGroupByMemoryAvailableByteLength; + keyStoreMemoryAvailableByteLength = 0; + } + } + + //------------------------------------------------------------------------------------------------ + + private static final int LARGEST_NUMBER_OF_STEPS_THRESHOLD = 6; + + public boolean isAboveLargestNumberOfStepsThresold() { + return (largestNumberOfSteps > LARGEST_NUMBER_OF_STEPS_THRESHOLD); + } + + /* + * Do override this method in specialized hash tables that have more to initialize and/or create. + */ + public void allocateHashTable() throws HiveException { + allocateBucketArray(); + } + + /* + * Allocate the key store when we have bytes keys. + */ + public VectorKeyStore allocateVectorKeyStore(VectorKeyStore keyStore) { + if (keyStore == null) { + return new VectorKeyStore(keyStoreByteSize); + } else { + keyStore.clear(); + return keyStore; + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void releaseHashTableMemory() throws HiveException { + if (slotMultiples.length == slotPhysicalArraySize) { + + // Keep it and clear it later. + return; + } + slotMultiples = null; + } + + // Since a maximum integer is 2^N - 2 it cannot be used we need one less than number of + // Integer bits. 2^30 = 1,073,741,824 + private static final int MAX_POWER_OF_2_FOR_INT_INDEXING = Integer.SIZE - 2; + + // An arbitrary factor to divide the slot table size by to get the key count limit. + // Hitting the key count limit will cause the hash table to be flushed to Reduce and cleared + // for refilling. + private static final int KEY_COUNT_FACTOR = 8; + + // Make sure we have comfortable room for at least one batch of new keys to support the + // VectorGroupByHashOperatorBase.checkKeyLimitOncePerBatch method. + private static final int MIN_HASH_TABLE_BYTE_LENGTH = + VectorizedRowBatch.DEFAULT_SIZE * KEY_COUNT_FACTOR * (Long.SIZE / Byte.SIZE); + private static final int MIN_POWER_OF_2 = floorPowerOf2(MIN_HASH_TABLE_BYTE_LENGTH); + + /* + * Determine the size for the slot table and, for bytes keys the key store. + */ + public void determineInitialHashTableSize() throws HiveException { + + /* + * Slot table size. + */ + + final int multiple = getHashTableMultiple(); + + // Take in account our multiple. + final int floorPowerOf2MaxHashTableMemoryByteLength = + floorPowerOf2(hashTableMemoryAvailableByteLength / multiple); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2HashTableMemoryByteLength = + Math.min(floorPowerOf2MaxHashTableMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + // UNDONE: Artificially limit for now... 2^24 = 16,777,216 bytes. + maxPowerOf2HashTableMemoryByteLength = Math.min(maxPowerOf2HashTableMemoryByteLength, 24); + + final int powerOf2HashTableMemoryByteLength = + Math.max(maxPowerOf2HashTableMemoryByteLength, MIN_POWER_OF_2); + + final int hashTableByteSize = (1 << powerOf2HashTableMemoryByteLength); + final int hashTableLongSize = hashTableByteSize / (Long.SIZE / Byte.SIZE); + + logicalHashBucketCount = hashTableLongSize; + + slotPhysicalArraySize = logicalHashBucketCount * multiple; + + /* + * Key store size. + */ + + if (isBytesHashTable) { + final int floorPowerOf2MaxKeyStoreMemoryByteLength = + floorPowerOf2(keyStoreMemoryAvailableByteLength); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2KeyStoreMemoryByteLength = + Math.min(floorPowerOf2MaxKeyStoreMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + keyStoreByteSize = (1 << maxPowerOf2KeyStoreMemoryByteLength); + + // CONSIDER: Better min/max limits. + keyStoreByteSize = Math.min(keyStoreByteSize, 1024 * 1024); + keyStoreByteSize = Math.max(keyStoreByteSize, 128 * 1024); + } + + if (!isBytesHashTable) { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple); + } else { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple + + " key store size " + keyStoreByteSize); + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void determineNextHashTableSize() throws HiveException { + // CONSIDER: Growing the hash table size upon examining current hash table. + } + + /* + * For now, we are just allocating the slot table array. + * FUTURE: We'll need to revisit these calculations when we support STRING keys. + */ + protected void allocateBucketArray() { + if (slotMultiples != null) { + + // The releaseHashTableMemory method kept same size array, so just clear it. + Arrays.fill(slotMultiples, 0); + } else { + + logicalHashBucketMask = logicalHashBucketCount - 1; + + hashTableKeyCountLimit = logicalHashBucketCount / KEY_COUNT_FACTOR; + + slotMultiples = new long[slotPhysicalArraySize]; + } + + keyCount = 0; + largestNumberOfSteps = 0; + + if (flushAndRecreateCount != 0) { + LOG.info("Flush and recreate #" + flushAndRecreateCount); + } + } + + /* + * Check the worst case possibility -- adding a new key for each row in the batch -- and flush + * and recreate the hash table. + */ + protected void checkKeyLimitOncePerBatch(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * Check the hash table key limit for doing the worst case of adding all keys outside the + * inner loop for better performance. + */ + final boolean isReachedKeyLimit = + (keyCount + inputLogicalSize > hashTableKeyCountLimit); + if (isReachedKeyLimit || isAboveLargestNumberOfStepsThresold()) { + LOG.info( + "Reached key limit " + isReachedKeyLimit + + ", above largest number of steps thresold " + isAboveLargestNumberOfStepsThresold()); + + flushAndRecreateCount++; + flushAndRecreate(); + if (keyCount + inputLogicalSize > hashTableKeyCountLimit) { + + // Hash table is way too small. + raise2ndHitOutOfStorage(); + } + } + } + + protected void raise2ndHitOutOfStorage() throws HiveException { + throw new HiveException( + "After flushing hash table and clearing, there still isn't enough storage?"); + } + + protected void flushAndRecreate() throws HiveException, IOException { + + /* + * 1) Flush hash table. + * 2) Use current state to determine next sizes. + * 3) Release memory, if necessary. + * 4) Recreate/clear using next sizes. + */ + + flushGroupBy(); + + // Based on current hash table sizes and perhaps historical information, determine + // the size to use next during recreation. + determineNextHashTableSize(); + + releaseHashTableMemory(); + + allocateHashTable(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/count/VectorGroupByHashBytesKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/count/VectorGroupByHashBytesKeyCountTable.java new file mode 100644 index 0000000..799427d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/count/VectorGroupByHashBytesKeyCountTable.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.count; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * A single bytes key hash table optimized for a single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeyCountTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + private transient WriteBuffers writeBuffers; + protected WriteBuffers.Position readPos; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyCountTable() { + super(); + } + + public VectorGroupByHashBytesKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + writeBuffers = keyStore.getWriteBuffers(); + readPos = new WriteBuffers.Position(); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + writeBuffers = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_ENTRY_SIZE; + } + + protected static final int BYTES_ENTRY_SIZE = 2; + + public void findOrCreateBytesKeyCount(byte[] keyBytes, int keyStart, int keyLength, + long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + boolean isNewKey; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + pairIndex = 2 * slot; + refWord = slotMultiples[pairIndex]; + if (refWord == 0) { + isNewKey = true; + break; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + + // First entry. + slotMultiples[pairIndex] = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + slotMultiples[pairIndex + 1] = count; + + keyCount++; + + } else if (count > 0) { + + slotMultiples[pairIndex + 1] += count; + } + } + + private int countKeyTripleIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + private long currentCountKeyCount; + + protected int initBytesKeyIterator() { + countKeyTripleIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + currentCountKeyCount = 0; + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[countKeyTripleIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + currentCountKeyCount = slotMultiples[countKeyTripleIndex + 2]; + + countKeyTripleIndex += 3; + return; + } + countKeyTripleIndex += 3; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } + + public long getCount() { + return currentCountKeyCount; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java new file mode 100644 index 0000000..89b2ba9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/* + * A single bytes key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeyDuplicateReductionTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + private transient WriteBuffers writeBuffers; + protected WriteBuffers.Position readPos; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashBytesKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + writeBuffers = keyStore.getWriteBuffers(); + readPos = new WriteBuffers.Position(); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + writeBuffers = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static final int BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void createOrIgnoreBytesDuplicateReductionKey(byte[] keyBytes, int keyStart, int keyLength, + long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + refWord = slotMultiples[slot]; + if (refWord == 0) { + // First entry. + slotMultiples[slot] = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + + keyCount++; + return; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + // Ignore. A duplicate has been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + } + + private int iterateIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + + protected int initBytesKeyIterator() { + iterateIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[iterateIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + + iterateIndex++; + return; + } + iterateIndex++; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/word/VectorGroupByHashBytesKeyWordTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/word/VectorGroupByHashBytesKeyWordTable.java new file mode 100644 index 0000000..da47a6b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/word/VectorGroupByHashBytesKeyWordTable.java @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.word; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.word.VectorGroupByHashLongKeyWordTable.FlagsWord; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * A single bytes key hash table optimized for a single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeyWordTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // Variables from most recent findLongKeyWord call: + protected boolean currentIsAggregationNeeded; + protected int currentPairIndex; + protected long currentAggregationWord; + + private VectorKeyStore keyStore; + private WriteBuffers writeBuffers; + private WriteBuffers.Position readPos; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyWordTable() { + super(); + } + + public VectorGroupByHashBytesKeyWordTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + writeBuffers = keyStore.getWriteBuffers(); + readPos = new WriteBuffers.Position(); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + writeBuffers = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_ENTRY_SIZE; + } + + protected static final int BYTES_ENTRY_SIZE = 2; + + public void createOrIgnoreBytesKeyNullEntry(byte[] keyBytes, int keyStart, int keyLength, + long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + pairIndex = 2 * slot; + refWord = slotMultiples[pairIndex]; + if (refWord == 0) { + + // Create. + refWord = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + slotMultiples[pairIndex] = refWord | KeyRef.Flag.flagOnMask; + + currentIsAggregationNeeded = false; + + keyCount++; + return; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + + // Ignore. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + } + + public void findOrCreateBytesKeyWord(byte[] keyBytes, int keyStart, int keyLength, + long hashCode, long word) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + long refWord; + final long partialHashCode = + VectorHashKeyRef.extractPartialHashCode(hashCode); + while (true) { + pairIndex = 2 * slot; + refWord = slotMultiples[pairIndex]; + if (refWord == 0) { + + // Create. + slotMultiples[pairIndex] = keyStore.add(partialHashCode, keyBytes, keyStart, keyLength); + slotMultiples[pairIndex + 1] = word; + + currentIsAggregationNeeded = false; + + keyCount++; + return; + } + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + partialHashCode && + VectorHashKeyRef.equalKey( + refWord, keyBytes, keyStart, keyLength, writeBuffers, readPos)) { + if (KeyRef.getFlag(refWord)) { + + // Turn off NULL flag. + slotMultiples[pairIndex] = refWord & KeyRef.Flag.flagOffMask; + slotMultiples[pairIndex + 1] = word; + currentIsAggregationNeeded = false; + } else { + currentPairIndex = pairIndex; + currentAggregationWord = slotMultiples[pairIndex + 1]; + currentIsAggregationNeeded = true; + } + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + } + + public void replaceBytesKeyWord(long word) { + slotMultiples[currentPairIndex + 1] = word; + } + + private int iteratePairIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + protected boolean iterateIsNullWord; + protected long iterateWord; + + protected int initBytesKeyIterator() { + iteratePairIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + iterateIsNullWord = false; + iterateWord = 0; + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long refWord = slotMultiples[iteratePairIndex]; + if (refWord != 0) { + keyStore.getKey( + refWord, + keyByteSegmentRef, + keyReadPos); + if (KeyRef.getFlag(refWord)) { + iterateIsNullWord = true; + } else { + iterateIsNullWord = false; + iterateWord = slotMultiples[iteratePairIndex + 1]; + } + + iteratePairIndex += 2; + return; + } + iteratePairIndex += 2; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountTable.java new file mode 100644 index 0000000..172f925 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/count/VectorGroupByHashLongKeyCountTable.java @@ -0,0 +1,301 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.count; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Single long key hash table optimized for: + * 1) COUNT(*) Native Vectorized GroupBy. + * 2) COUNT(key-column) and COUNT(non-key-column) + * Native Vectorized GroupBy + */ +public abstract class VectorGroupByHashLongKeyCountTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_NON_ZERO_COUNT_ENTRY_SIZE; + } + + protected static final int LONG_NON_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongKeyNonZeroCount(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + + private int nonZeroCountPairIndex; + private long currentNonZeroCount; + + protected int initLongNonZeroCountKeyIterator() { + nonZeroCountPairIndex = 0; + currentNonZeroCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextNonZeroCountKey() { + while (true) { + long count = slotMultiples[nonZeroCountPairIndex + 1]; + if (count > 0) { + currentNonZeroCount = count; + long key = slotMultiples[nonZeroCountPairIndex]; + nonZeroCountPairIndex += 2; + return key; + } + nonZeroCountPairIndex += 2; + } + } + + public long getLongNonZeroCount() { + return currentNonZeroCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key non-zero count hash table to the + * output. + */ + protected void outputLongNonZeroKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongNonZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNextNonZeroCountKey(); + countVector[i] = getLongNonZeroCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } + + //------------------------------------------------------------------------------------------------ + + private static long LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK = 1L << 63; + + protected static int LONG_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongKeyZeroCount(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + if (count == 0) { + slotMultiples[pairIndex + 1] = LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK; + } else { + slotMultiples[pairIndex + 1] = count; + } + } else if (count > 0) { + + // Only update count when we are leaving 0. + if (slotMultiples[pairIndex + 1] == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + } + + private int countKeyPairIndex; + private long currentCountKeyCount; + + protected int initLongZeroCountKeyIterator() { + countKeyPairIndex = 0; + currentCountKeyCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextZeroCountKey() { + while (true) { + long count = slotMultiples[countKeyPairIndex + 1]; + if (count != 0) { + if (count == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + currentCountKeyCount = 0; + } else { + currentCountKeyCount = count; + } + long key = slotMultiples[countKeyPairIndex]; + countKeyPairIndex += 2; + return key; + } + countKeyPairIndex += 2; + } + } + + public long getCount() { + return currentCountKeyCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + protected void outputLongZeroCountKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getNextZeroCountKey(); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java new file mode 100644 index 0000000..d2c6255 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * A single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashLongKeyDuplicateReductionTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected boolean haveZeroKey; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveZeroKey = false; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static int LONG_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void createOrIgnoreLongDuplicateReductionKey(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + while (true) { + if (slotMultiples[slot] == 0) { + break; + } + if (key == slotMultiples[slot]) { + // Found it! A duplicate has now been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + // Create first-time key. + slotMultiples[slot] = key; + keyCount++; + } + + private int countKeyIndex; + + protected int initLongDuplicateReductionKeyIterator() { + countKeyIndex = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long key = slotMultiples[countKeyIndex++]; + if (key != 0) { + return key; + } + } + } + + protected void doOutputLongKeys( + LongColumnVector keyColumnVector) throws HiveException { + + long[] keyVector = keyColumnVector.vector; + + if (haveZeroKey) { + + // Zero key to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + keyVector[outputBatch.size++] = 0; + } + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongDuplicateReductionKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/word/VectorGroupByHashLongKeyWordTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/word/VectorGroupByHashLongKeyWordTable.java new file mode 100644 index 0000000..2a1b0e5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/word/VectorGroupByHashLongKeyWordTable.java @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.word; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * UNDONE: <---------------- + * Native Vectorized GroupBy + */ +public abstract class VectorGroupByHashLongKeyWordTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected final int keyColumnNum; + + // Variables from most recent findLongKeyWord call: + protected boolean currentIsAggregationNeeded; + private int currentTripleIndex; + protected long currentAggregationWord; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyWordTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyWordTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_WORD_ENTRY_SIZE; + } + + protected static final int LONG_WORD_ENTRY_SIZE = 3; + + public static final class FlagsWord { + + public static final class IsExistsFlag { + public static final int bitLength = 1; + public static final long flagOnMask = 1L; + } + + public static boolean getIsExistsFlag(long flagsWord) { + return (flagsWord & IsExistsFlag.flagOnMask) != 0; + } + + public static final class IsNullFlag { + public static final int bitLength = 1; + public static final int bitShift = IsExistsFlag.bitLength; + public static final long flagOnMask = 1L << bitShift; + } + + public static boolean getIsNullFlag(long flagsWord) { + return (flagsWord & IsNullFlag.flagOnMask) != 0; + } + + public static final class IsOverflowFlag { + public static final int bitShift = IsNullFlag.bitShift + IsNullFlag.bitLength; + public static final long flagOnMask = 1L << bitShift; + } + + public static final boolean getIsOverflowFlag(long flagsWord) { + return (flagsWord & IsOverflowFlag.flagOnMask) != 0; + } + + public static long existsAndIsNullMask = IsExistsFlag.flagOnMask | IsNullFlag.flagOnMask; + } + + public void createOrIgnoreLongKeyNullEntry(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int tripleIndex = 0; + while (true) { + tripleIndex = 3 * slot; + final long flagsWord = slotMultiples[tripleIndex + 1]; + if (flagsWord == 0) { + + // Create. + slotMultiples[tripleIndex + 1] = FlagsWord.existsAndIsNullMask; + + slotMultiples[tripleIndex] = key; + + keyCount++; + return; + } + if (key == slotMultiples[tripleIndex]) { + + // Ignore. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + } + + public void findOrCreateLongKeyWord(long key, long hashCode, long word) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int tripleIndex = 0; + while (true) { + tripleIndex = 3 * slot; + final long flagsWord = slotMultiples[tripleIndex + 1]; + if (flagsWord == 0) { + slotMultiples[tripleIndex + 1] = FlagsWord.IsExistsFlag.flagOnMask; + slotMultiples[tripleIndex + 2] = word; + + slotMultiples[tripleIndex] = key; + currentIsAggregationNeeded = false; + + keyCount++; + return; + } + if (key == slotMultiples[tripleIndex]) { + if ((flagsWord & FlagsWord.IsNullFlag.flagOnMask) != 0) { + + // Turn off NULL flag. + slotMultiples[tripleIndex + 1] = FlagsWord.IsExistsFlag.flagOnMask; + slotMultiples[tripleIndex + 2] = word; + currentIsAggregationNeeded = false; + } else { + currentTripleIndex = tripleIndex; + currentAggregationWord = slotMultiples[tripleIndex + 2]; + currentIsAggregationNeeded = true; + } + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + } + + public void replaceLongKeyWord(long word) { + slotMultiples[currentTripleIndex + 2] = word; + } + + private int iterateTripleIndex; + private boolean iterateIsNullWord; + private long iterateWord; + + protected int initLongKeyWordIterator() { + iterateTripleIndex = 0; + iterateIsNullWord = false; + iterateWord = 0; + return keyCount; + } + + // Find next key and return it. + protected long getIterateNextLongKey() { + while (true) { + long flagsWord = slotMultiples[iterateTripleIndex + 1]; + if (flagsWord != 0) { + if ((flagsWord & FlagsWord.IsNullFlag.flagOnMask) != 0) { + iterateIsNullWord = true; + } else { + iterateIsNullWord = false; + iterateWord = slotMultiples[iterateTripleIndex + 2]; + } + long key = slotMultiples[iterateTripleIndex]; + iterateTripleIndex += 3; + return key; + } + iterateTripleIndex += 3; + } + } + + //------------------------------------------------------------------------------------------------ + + /** + * UNDONE: Aggregate NULLs. + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + protected void outputLongKeyAndAggregatePairs( + LongColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongKeyWordIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getIterateNextLongKey(); + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void outputLongKeyAndAggregatePairs( + LongColumnVector keyColumnVector, + DoubleColumnVector aggregateColumnVector) throws HiveException { + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + double[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongKeyWordIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getIterateNextLongKey(); + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/count/VectorGroupByHashMultiKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/count/VectorGroupByHashMultiKeyCountTable.java new file mode 100644 index 0000000..b3d0485 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/count/VectorGroupByHashMultiKeyCountTable.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.count; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.count.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashMultiKeyCountTable + extends VectorGroupByHashBytesKeyCountTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyCountTable() { + super(); + } + + public VectorGroupByHashMultiKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final int size = groupByKeyExpressions.length; + TypeInfo[] typeInfos = new TypeInfo[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + } + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Multi-key is starting at output column 0. + keyVectorDeserializeRow.init(0); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeyAndCounts() throws HiveException { + + final int keySize = groupByKeyExpressions.length; + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/duplicatereduction/VectorGroupByHashMultiKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/duplicatereduction/VectorGroupByHashMultiKeyDuplicateReductionTable.java new file mode 100644 index 0000000..cb606fd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/duplicatereduction/VectorGroupByHashMultiKeyDuplicateReductionTable.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.duplicatereduction; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashMultiKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashMultiKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final int size = groupByKeyExpressions.length; + TypeInfo[] typeInfos = new TypeInfo[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + } + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Multi-key is starting at output column 0. + keyVectorDeserializeRow.init(0); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeys() throws HiveException { + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/count/VectorGroupByHashSingleKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/count/VectorGroupByHashSingleKeyCountTable.java new file mode 100644 index 0000000..6b07527 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/count/VectorGroupByHashSingleKeyCountTable.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.count; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.count.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSingleKeyCountTable + extends VectorGroupByHashBytesKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeyCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSingleKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSingleKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/duplicatereduction/VectorGroupByHashSingleKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/duplicatereduction/VectorGroupByHashSingleKeyDuplicateReductionTable.java new file mode 100644 index 0000000..af4fb59 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/duplicatereduction/VectorGroupByHashSingleKeyDuplicateReductionTable.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.duplicatereduction; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSingleKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSingleKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSerializeKeys( + ColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/word/VectorGroupByHashSingleKeyWordTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/word/VectorGroupByHashSingleKeyWordTable.java new file mode 100644 index 0000000..a397c7d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/singlekey/word/VectorGroupByHashSingleKeyWordTable.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.singlekey.word; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.word.VectorGroupByHashBytesKeyWordTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * UNDONE: <---------------- + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSingleKeyWordTable + extends VectorGroupByHashBytesKeyWordTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSingleKeyWordTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSingleKeyWordTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and aggregate pairs of the one string hash table to the output. + */ + protected void doOutputSingleKeyAndAggregatePairs( + ColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } + + protected void doOutputSingleKeyAndAggregatePairs( + ColumnVector keyColumnVector, + DoubleColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + double[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/count/VectorGroupByHashStringKeyCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/count/VectorGroupByHashStringKeyCountTable.java new file mode 100644 index 0000000..2332f7d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/count/VectorGroupByHashStringKeyCountTable.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.count; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.count.VectorGroupByHashBytesKeyCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * An single string key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyCountTable + extends VectorGroupByHashBytesKeyCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeyAndCountPairs( + BytesColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java new file mode 100644 index 0000000..5e0c38a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.duplicatereduction; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/*U + * An single long string key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeys( + BytesColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/word/VectorGroupByHashStringKeyWordTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/word/VectorGroupByHashStringKeyWordTable.java new file mode 100644 index 0000000..33a5aa4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/word/VectorGroupByHashStringKeyWordTable.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.word; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.word.VectorGroupByHashBytesKeyWordTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * UNDONE: <---------------- + * An single string key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyWordTable + extends VectorGroupByHashBytesKeyWordTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyWordTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyWordTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * UNDONE: Aggregate NULLs. + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeyAndAggregatePairs( + BytesColumnVector keyColumnVector, + LongColumnVector aggregateColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] aggregateIsNull = aggregateColumnVector.isNull; + long[] aggregateVector = aggregateColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + aggregateIsNull[batchIndex] = false; + if (iterateIsNullWord) { + aggregateIsNull[batchIndex] = true; + aggregateColumnVector.noNulls = false; + } else { + aggregateIsNull[batchIndex] = false; + aggregateVector[batchIndex] = iterateWord; + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/hashkeyref/VectorHashKeyRef.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/hashkeyref/VectorHashKeyRef.java new file mode 100644 index 0000000..26835a2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/hashkeyref/VectorHashKeyRef.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.hashkeyref; + +import org.apache.hadoop.hive.serde2.WriteBuffers; +// import com.google.common.base.Preconditions; + +public class VectorHashKeyRef { + + public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers writeBuffers, WriteBuffers.Position readPos) { + + // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + + final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + + writeBuffers.setReadPoint(absoluteOffset, readPos); + + int actualKeyLength = KeyRef.getSmallKeyLength(refWord); + boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); + if (!isKeyLengthSmall) { + + // And, if current value is big we must read it. + actualKeyLength = writeBuffers.readVInt(readPos); + } + + if (actualKeyLength != keyLength) { + return false; + } + + // Our reading was positioned to the key. + if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { + return false; + } + + return true; + } + + public static int calculateHashCode(long refWord, WriteBuffers writeBuffers, + WriteBuffers.Position readPos) { + + // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + + final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + + int actualKeyLength = KeyRef.getSmallKeyLength(refWord); + boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); + final long keyAbsoluteOffset; + if (!isKeyLengthSmall) { + + // Position after next relative offset (fixed length) to the key. + writeBuffers.setReadPoint(absoluteOffset, readPos); + + // And, if current value is big we must read it. + actualKeyLength = writeBuffers.readVInt(readPos); + keyAbsoluteOffset = absoluteOffset + actualKeyLength; + } else { + keyAbsoluteOffset = absoluteOffset; + } + + return writeBuffers.unsafeHashCode(keyAbsoluteOffset, actualKeyLength); + } + + public static final class KeyRef { + + // Lowest field. + public static final class PartialHashCode { + public static final int bitLength = 15; + public static final long allBitsOn = (1L << bitLength) - 1; + public static final long bitMask = allBitsOn; + + // Choose the high bits of the hash code KNOWING it was calculated as an int. + // + // We want the partial hash code to be different than the + // lower bits used for our hash table slot calculations. + public static final int intChooseBitShift = Integer.SIZE - bitLength; + } + + public static long getPartialHashCode(long refWord) { + // No shift needed since this is the lowest field. + return refWord & PartialHashCode.bitMask; + } + + // Can make the 64 bit reference non-zero if this is non-zero. E.g. for hash map and + // hash multi-set, the offset is to the first key which is always preceded by a 5 byte next + // relative value offset or 4 byte count. + public static final class AbsoluteOffset { + public static final int bitLength = 39; + public static final int byteLength = (bitLength + Byte.SIZE -1) / Byte.SIZE; + public static final long allBitsOn = (1L << bitLength) - 1; + public static final int bitShift = PartialHashCode.bitLength; + public static final long bitMask = ((long) allBitsOn) << bitShift; + + // Make it a power of 2. + public static final long maxSize = 1L << (bitLength - 2); + } + + public static long getAbsoluteOffset(long refWord) { + return (refWord & KeyRef.AbsoluteOffset.bitMask) >> AbsoluteOffset.bitShift; + } + + // When this field equals SmallKeyLength.allBitsOn, the key length is serialized at the + // beginning of the key. + public static final class SmallKeyLength { + public static final int bitLength = 7; + public static final int allBitsOn = (1 << bitLength) - 1; + public static final int threshold = allBitsOn; + public static final int bitShift = AbsoluteOffset.bitShift + AbsoluteOffset.bitLength; + public static final long bitMask = ((long) allBitsOn) << bitShift; + public static final long allBitsOnBitShifted = ((long) allBitsOn) << bitShift; + } + + public static int getSmallKeyLength(long refWord) { + return (int) ((refWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + } + + public static final class Flag { + public static final int bitLength = 1; + public static final int bitShift = SmallKeyLength.bitShift + SmallKeyLength.bitLength; + public static final long flagOnMask = 1L << bitShift; + public static final long flagOffMask = ~flagOnMask; + } + + public static boolean getFlag(long refWord) { + return (refWord & Flag.flagOnMask) != 0; + } + + public static final class Flag2 { + public static final int bitShift = Flag.bitShift + Flag.bitLength; + public static final long flagOnMask = 1L << bitShift; + public static final long flagOffMask = ~flagOnMask; + } + + public static boolean getFlag2(long refWord) { + return (refWord & Flag2.flagOnMask) != 0; + } + + // This bit should not be on for valid value references. We use -1 for a no value marker. + public static final class IsInvalidFlag { + public static final int bitShift = 63; + public static final long flagOnMask = 1L << bitShift; + } + + public static boolean getIsInvalidFlag(long refWord) { + return (refWord & IsInvalidFlag.flagOnMask) != 0; + } + } + + + /** + * Extract partial hash code from the full hash code. + * + * Choose the high bits of the hash code KNOWING it was calculated as an int. + * + * We want the partial hash code to be different than the + * lower bits used for our hash table slot calculations. + * + * @param hashCode + * @return + */ + public static long extractPartialHashCode(long hashCode) { + return (hashCode >>> KeyRef.PartialHashCode.intChooseBitShift) & KeyRef.PartialHashCode.bitMask; + } + + /** + * Get partial hash code from the reference word. + * @param hashCode + * @return + */ + public static long getPartialHashCodeFromRefWord(long refWord) { + return KeyRef.getPartialHashCode(refWord); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java new file mode 100644 index 0000000..2932d26 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.keystore; + +import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * Optimized for sequential key lookup. + */ + +public class VectorKeyStore implements MemoryEstimate { + + protected WriteBuffers writeBuffers; + + public WriteBuffers getWriteBuffers() { + return writeBuffers; + } + + /** + * A store for a bytes keys in memory. + * + * The memory is a "infinite" byte array as a WriteBuffers object. + * + * We give the client a 64-bit key and count reference to keep that + * has the offset within the "infinite" byte array of the key. The 64 bits includes about half + * of the upper hash code to help during matching. + * + * We optimize the common case when the key length is short and store that information in the + * 64 bit reference. + * + * Cases: + * + * 1) One element when key and is small (and stored in the reference word): + * + * Key and Value Reference + * | + * | absoluteOffset + * | + * | + * v + * + * KEY + * + * 2) One element, general: shows optional big key length. + * + * Key and Value Reference + * | + * | absoluteOffset + * | + * | + * v + * [Big Key Length] + * optional KEY + */ + + /** + * Two 64-bit long result is the key reference. + * @param partialHashCode + * @param keyBytes + * @param keyStart + * @param keyLength + */ + public long add(long partialHashCode, byte[] keyBytes, int keyStart, int keyLength) { + + // NOTE: In order to guarantee the reference word is non-zero, we write one pad byte in + // the constructor so absolute offset is non-zero. + final long absoluteOffset = writeBuffers.getWritePoint(); + + boolean isKeyLengthBig = (keyLength >= KeyRef.SmallKeyLength.threshold); + if (isKeyLengthBig) { + writeBuffers.writeVInt(keyLength); + } + writeBuffers.write(keyBytes, keyStart, keyLength); + + /* + * Form 64 bit key and value reference. + */ + long refWord = partialHashCode; + + refWord |= absoluteOffset << KeyRef.AbsoluteOffset.bitShift; + + if (isKeyLengthBig) { + refWord |= KeyRef.SmallKeyLength.allBitsOnBitShifted; + } else { + refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; + } + + // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); + + return refWord; + } + + public VectorKeyStore(int writeBuffersSize) { + writeBuffers = new WriteBuffers(writeBuffersSize, KeyRef.AbsoluteOffset.maxSize); + + // NOTE: In order to guarantee the reference word is non-zero, we write one pad byte. + long offset = writeBuffers.getWritePoint(); + if (offset != 0) { + throw new RuntimeException("Expected to be at offset 0"); + } + writeBuffers.write(0); + } + + public void clear() { + writeBuffers.clear(); + } + + /* + * Get a get from the store given a key reference. + * The supplied readPos makes the read safe for shared-memory usage. + */ + public void getKey(long refWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = KeyRef.getSmallKeyLength(refWord); + boolean isKeyLengthSmall = (storedKeyLength != KeyRef.SmallKeyLength.allBitsOn); + + long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + + writeBuffers.setReadPoint(absoluteOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + + @Override + public long getEstimatedMemorySize() { + long size = 0; + size += writeBuffers == null ? 0 : writeBuffers.getEstimatedMemorySize(); + return size; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java deleted file mode 100644 index dbfe518..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java +++ /dev/null @@ -1,178 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import org.apache.hadoop.hive.serde2.WriteBuffers; -// import com.google.common.base.Preconditions; - -public class VectorMapJoinFastBytesHashKeyRef { - - public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, int keyLength, - WriteBuffers writeBuffers, WriteBuffers.Position readPos) { - - // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); - - final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); - - writeBuffers.setReadPoint(absoluteOffset, readPos); - - int actualKeyLength = KeyRef.getSmallKeyLength(refWord); - boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); - if (!isKeyLengthSmall) { - - // And, if current value is big we must read it. - actualKeyLength = writeBuffers.readVInt(readPos); - } - - if (actualKeyLength != keyLength) { - return false; - } - - // Our reading was positioned to the key. - if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { - return false; - } - - return true; - } - - public static int calculateHashCode(long refWord, WriteBuffers writeBuffers, - WriteBuffers.Position readPos) { - - // Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); - - final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); - - int actualKeyLength = KeyRef.getSmallKeyLength(refWord); - boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); - final long keyAbsoluteOffset; - if (!isKeyLengthSmall) { - - // Position after next relative offset (fixed length) to the key. - writeBuffers.setReadPoint(absoluteOffset, readPos); - - // And, if current value is big we must read it. - actualKeyLength = writeBuffers.readVInt(readPos); - keyAbsoluteOffset = absoluteOffset + actualKeyLength; - } else { - keyAbsoluteOffset = absoluteOffset; - } - - return writeBuffers.unsafeHashCode(keyAbsoluteOffset, actualKeyLength); - } - - public static final class KeyRef { - - // Lowest field. - public static final class PartialHashCode { - public static final int bitLength = 15; - public static final long allBitsOn = (1L << bitLength) - 1; - public static final long bitMask = allBitsOn; - - // Choose the high bits of the hash code KNOWING it was calculated as an int. - // - // We want the partial hash code to be different than the - // lower bits used for our hash table slot calculations. - public static final int intChooseBitShift = Integer.SIZE - bitLength; - } - - public static long getPartialHashCode(long refWord) { - // No shift needed since this is the lowest field. - return refWord & PartialHashCode.bitMask; - } - - // Can make the 64 bit reference non-zero if this is non-zero. E.g. for hash map and - // hash multi-set, the offset is to the first key which is always preceded by a 5 byte next - // relative value offset or 4 byte count. - public static final class AbsoluteOffset { - public static final int bitLength = 39; - public static final int byteLength = (bitLength + Byte.SIZE -1) / Byte.SIZE; - public static final long allBitsOn = (1L << bitLength) - 1; - public static final int bitShift = PartialHashCode.bitLength; - public static final long bitMask = ((long) allBitsOn) << bitShift; - - // Make it a power of 2. - public static final long maxSize = 1L << (bitLength - 2); - } - - public static long getAbsoluteOffset(long refWord) { - return (refWord & KeyRef.AbsoluteOffset.bitMask) >> AbsoluteOffset.bitShift; - } - - // When this field equals SmallKeyLength.allBitsOn, the key length is serialized at the - // beginning of the key. - public static final class SmallKeyLength { - public static final int bitLength = 8; - public static final int allBitsOn = (1 << bitLength) - 1; - public static final int threshold = allBitsOn; - public static final int bitShift = AbsoluteOffset.bitShift + AbsoluteOffset.bitLength; - public static final long bitMask = ((long) allBitsOn) << bitShift; - public static final long allBitsOnBitShifted = ((long) allBitsOn) << bitShift; - } - - public static int getSmallKeyLength(long refWord) { - return (int) ((refWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - } - - public static final class IsSingleFlag { - public static final int bitShift = SmallKeyLength.bitShift + SmallKeyLength.bitLength; - public static final long flagOnMask = 1L << bitShift; - public static final long flagOffMask = ~flagOnMask; - } - - public static boolean getIsSingleFlag(long refWord) { - return (refWord & IsSingleFlag.flagOnMask) != 0; - } - - // This bit should not be on for valid value references. We use -1 for a no value marker. - public static final class IsInvalidFlag { - public static final int bitShift = 63; - public static final long flagOnMask = 1L << bitShift; - } - - public static boolean getIsInvalidFlag(long refWord) { - return (refWord & IsInvalidFlag.flagOnMask) != 0; - } - } - - - /** - * Extract partial hash code from the full hash code. - * - * Choose the high bits of the hash code KNOWING it was calculated as an int. - * - * We want the partial hash code to be different than the - * lower bits used for our hash table slot calculations. - * - * @param hashCode - * @return - */ - public static long extractPartialHashCode(long hashCode) { - return (hashCode >>> KeyRef.PartialHashCode.intChooseBitShift) & KeyRef.PartialHashCode.bitMask; - } - - /** - * Get partial hash code from the reference word. - * @param hashCode - * @return - */ - public static long getPartialHashCodeFromRefWord(long refWord) { - return KeyRef.getPartialHashCode(refWord); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index add8b9c..8f05033 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -19,7 +19,15 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; import org.apache.hadoop.hive.ql.exec.JoinUtil; +<<<<<<< HEAD +<<<<<<< HEAD import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +======= +======= +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +>>>>>>> 0c8835c... pull +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +>>>>>>> 1a04fe1... more import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; @@ -166,16 +174,16 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr boolean isNewKey; long refWord; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { refWord = slots[slot]; if (refWord == 0) { isNewKey = true; break; } - if (VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode && - VectorMapJoinFastBytesHashKeyRef.equalKey( + VectorHashKeyRef.equalKey( refWord, keyBytes, keyStart, keyLength, writeBuffers, unsafeReadPos)) { isNewKey = false; break; @@ -259,7 +267,7 @@ protected final int doHashMapMatch( long probeSlot = slot; int i = 0; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { final long refWord = slots[slot]; if (refWord == 0) { @@ -267,7 +275,7 @@ protected final int doHashMapMatch( // Given that we do not delete, an empty slot means no match. return -1; } else if ( - VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode) { // Finally, verify the key bytes match and remember read positions, etc in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java index b71ebb6..c253988 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMapStore.java @@ -20,7 +20,7 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastBytesHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; @@ -264,7 +264,7 @@ public boolean equalKey(byte[] keyBytes, int keyStart, int keyLength) { */ public void setMatch() { hasRows = true; - isSingleRow = KeyRef.getIsSingleFlag(refWord); + isSingleRow = KeyRef.getFlag(refWord); // We must set the position since equalKey does not leave us positioned correctly. hashMapStore.writeBuffers.setReadPoint( @@ -490,7 +490,7 @@ public long addFirst(long partialHashCode, byte[] keyBytes, int keyStart, int ke refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; } - refWord |= KeyRef.IsSingleFlag.flagOnMask; + refWord |= KeyRef.Flag.flagOnMask; // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); @@ -517,10 +517,10 @@ public long addMore(long refWord, byte[] valueBytes, int valueStart, int valueLe // Where the new value record will be written. long nextAbsoluteValueOffset = writeBuffers.getWritePoint(); - if (KeyRef.getIsSingleFlag(refWord)) { + if (KeyRef.getFlag(refWord)) { // Mark reference as having more than 1 value. - refWord &= KeyRef.IsSingleFlag.flagOffMask; + refWord &= KeyRef.Flag.flagOffMask; // Write zeros to indicate no 3rd record. writeBuffers.write(RelativeOffset.zeroPadding); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 5ec90b4..f3e09e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -23,6 +23,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -63,16 +65,16 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr boolean isNewKey; long refWord; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { refWord = slots[slot]; if (refWord == 0) { isNewKey = true; break; } - if (VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode && - VectorMapJoinFastBytesHashKeyRef.equalKey( + VectorHashKeyRef.equalKey( refWord, keyBytes, keyStart, keyLength, writeBuffers, unsafeReadPos)) { isNewKey = false; break; @@ -132,7 +134,7 @@ protected final void doHashMultiSetContains( long probeSlot = slot; int i = 0; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { final long refWord = slots[slot]; if (refWord == 0) { @@ -140,7 +142,7 @@ protected final void doHashMultiSetContains( // Given that we do not delete, an empty slot means no match. return; } else if ( - VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode) { // Finally, verify the key bytes match and remember the set membership count in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java index 20fa03a..a98901e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSetStore.java @@ -20,7 +20,7 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastBytesHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.Position; @@ -163,7 +163,7 @@ public boolean equalKey(byte[] keyBytes, int keyStart, int keyLength) { * if necessary. */ public void setContains() { - isSingleCount = KeyRef.getIsSingleFlag(refWord); + isSingleCount = KeyRef.getFlag(refWord); if (isSingleCount) { count = 1; @@ -227,7 +227,7 @@ public long addFirst(long partialHashCode, byte[] keyBytes, int keyStart, int ke refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; } - refWord |= KeyRef.IsSingleFlag.flagOnMask; + refWord |= KeyRef.Flag.flagOnMask; // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); @@ -252,7 +252,7 @@ public long bumpCount(long refWord, WriteBuffers.Position unsafeReadPos) { countAbsoluteOffset, unsafeReadPos); // Mark reference as having more than 1 as the count. - refWord &= KeyRef.IsSingleFlag.flagOffMask; + refWord &= KeyRef.Flag.flagOffMask; // Save current write position. final long saveAbsoluteOffset = writeBuffers.getWritePoint(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 7c73aa6..ae5290e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -21,6 +21,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.io.BytesWritable; @@ -58,16 +60,16 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr boolean isNewKey; long refWord; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { refWord = slots[slot]; if (refWord == 0) { isNewKey = true; break; } - if (VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + if (VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode && - VectorMapJoinFastBytesHashKeyRef.equalKey( + VectorHashKeyRef.equalKey( refWord, keyBytes, keyStart, keyLength, writeBuffers, unsafeReadPos)) { isNewKey = false; break; @@ -123,7 +125,7 @@ protected final void doHashSetContains( long probeSlot = slot; int i = 0; final long partialHashCode = - VectorMapJoinFastBytesHashKeyRef.extractPartialHashCode(hashCode); + VectorHashKeyRef.extractPartialHashCode(hashCode); while (true) { final long refWord = slots[slot]; if (refWord == 0) { @@ -131,7 +133,7 @@ protected final void doHashSetContains( // Given that we do not delete, an empty slot means no match. return; } else if ( - VectorMapJoinFastBytesHashKeyRef.getPartialHashCodeFromRefWord(refWord) == + VectorHashKeyRef.getPartialHashCodeFromRefWord(refWord) == partialHashCode) { // Finally, verify the key bytes match and implicitly remember the set existence in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java index 1a78688..6370b21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSetStore.java @@ -20,7 +20,8 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastBytesHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef.KeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.Position; @@ -31,50 +32,7 @@ * Used by VectorMapJoinFastBytesHashSet to store the key and count for a hash set with * a bytes key. */ -public class VectorMapJoinFastBytesHashSetStore implements MemoryEstimate { - - private WriteBuffers writeBuffers; - - /** - * A store for a bytes key for a hash set in memory. - * - * The memory is a "infinite" byte array as a WriteBuffers object. - * - * We give the client (e.g. hash set logic) a 64-bit key and count reference to keep that - * has the offset within the "infinite" byte array of the key. The 64 bits includes about half - * of the upper hash code to help during matching. - * - * We optimize the common case when the key length is short and store that information in the - * 64 bit reference. - * - * Cases: - * - * 1) One element when key and is small (and stored in the reference word): - * - * Key and Value Reference - * | - * | absoluteOffset - * | - * | - * v - * - * KEY - * - * 2) One element, general: shows optional big key length. - * - * Key and Value Reference - * | - * | absoluteOffset - * | - * | - * v - * [Big Key Length] - * optional KEY - */ - - public WriteBuffers getWriteBuffers() { - return writeBuffers; - } +public class VectorMapJoinFastBytesHashSetStore extends VectorKeyStore { /** * A hash set result for the key. @@ -164,56 +122,7 @@ public String toString() { } } - /** - * Two 64-bit long result is the key and value reference. - * @param partialHashCode - * @param keyBytes - * @param keyStart - * @param keyLength - */ - public long add(long partialHashCode, byte[] keyBytes, int keyStart, int keyLength) { - - // We require the absolute offset to be non-zero so the 64 key and value reference is non-zero. - // So, we make it the offset after the relative offset and to the key. - final long absoluteOffset = writeBuffers.getWritePoint(); - - // NOTE: In order to guarantee the reference word is non-zero, later we will set the - // NOTE: single flag. - - boolean isKeyLengthBig = (keyLength >= KeyRef.SmallKeyLength.threshold); - if (isKeyLengthBig) { - writeBuffers.writeVInt(keyLength); - } - writeBuffers.write(keyBytes, keyStart, keyLength); - - /* - * Form 64 bit key and value reference. - */ - long refWord = partialHashCode; - - refWord |= absoluteOffset << KeyRef.AbsoluteOffset.bitShift; - - if (isKeyLengthBig) { - refWord |= KeyRef.SmallKeyLength.allBitsOnBitShifted; - } else { - refWord |= ((long) keyLength) << KeyRef.SmallKeyLength.bitShift; - } - - refWord |= KeyRef.IsSingleFlag.flagOnMask; - - // Preconditions.checkState(!KeyRef.getIsInvalidFlag(refWord)); - - return refWord; - } - public VectorMapJoinFastBytesHashSetStore(int writeBuffersSize) { - writeBuffers = new WriteBuffers(writeBuffersSize, KeyRef.AbsoluteOffset.maxSize); - } - - @Override - public long getEstimatedMemorySize() { - long size = 0; - size += writeBuffers == null ? 0 : writeBuffers.getEstimatedMemorySize(); - return size; + super(writeBuffersSize); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 3d45a54..11214e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.exec.vector.hashkeyref.VectorHashKeyRef; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; @@ -73,7 +75,7 @@ protected void expandAndRehash() { final long refWord = slots[slot]; if (refWord != 0) { final long hashCode = - VectorMapJoinFastBytesHashKeyRef.calculateHashCode( + VectorHashKeyRef.calculateHashCode( refWord, writeBuffers, unsafeReadPos); // Copy to new slot table. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 51b186c..7ee40fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -97,6 +97,35 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyLongSumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyLongSumColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSingleKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyLongMaxColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyLongMinColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyLongSumColumnOperator; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -134,9 +163,15 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate.CountAggregateKind; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.WordAggregate.WordAggregateKind; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.WordAggregate; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc; @@ -312,6 +347,12 @@ private EnabledOverride vectorizationEnabledOverride; boolean isTestForcedVectorizationEnable; + boolean isVectorizationGroupByNativeEnabled; + private VectorizationEnabledOverride vectorizationGroupByNativeEnabledOverride; + boolean isTestForcedVectorizationGroupByNativeEnable; + boolean weCanAttemptGroupByNativeVectorization; + int testGroupByMaxMemoryAvailable; + private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; private boolean useRowDeserialize; @@ -2428,6 +2469,44 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } + // Native Vector GROUP BY. + isVectorizationGroupByNativeEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED); + + final String testVectorizationGroupByNativeOverrideString = + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE); + vectorizationGroupByNativeEnabledOverride = + VectorizationEnabledOverride.nameMap.get(testVectorizationGroupByNativeOverrideString); + + isTestForcedVectorizationGroupByNativeEnable = false; + switch (vectorizationGroupByNativeEnabledOverride) { + case NONE: + weCanAttemptGroupByNativeVectorization = isVectorizationGroupByNativeEnabled; + break; + case DISABLE: + weCanAttemptGroupByNativeVectorization = false; + break; + case ENABLE: + weCanAttemptGroupByNativeVectorization = true; + isTestForcedVectorizationGroupByNativeEnable = !isVectorizationGroupByNativeEnabled; + + // Different parts of the code rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED, true); + isVectorizationGroupByNativeEnabled = true; + break; + default: + throw new RuntimeException("Unexpected vectorization enabled override " + + vectorizationGroupByNativeEnabledOverride); + } + + testGroupByMaxMemoryAvailable = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE); + + // Input Format control. useVectorizedInputFileFormat = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); @@ -2649,7 +2728,10 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDescNoComplex(desc.getKeys(), "Key"); + + // Allow Complex Type key expressions here because we may specialize. + // Later we will verify again. + boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); if (!ret) { return false; } @@ -3942,6 +4024,494 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi return result; } + public static Operator specializeGroupByOperator( + Operator op, VectorizationContext vContext, + GroupByDesc desc, VectorGroupByDesc vectorDesc) + throws HiveException { + + VectorGroupByInfo vectorGroupByInfo = vectorDesc.getVectorGroupByInfo(); + + Operator vectorOp = null; + Class> opClass = null; + + VectorGroupByInfo.HashTableKeyType hashTableKeyType = + vectorGroupByInfo.getHashTableKeyType(); + + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + switch (aggregationVariation) { + case HASH_DUPLICATE_REDUCTION: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyDuplicateReductionOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyDuplicateReductionOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyDuplicateReductionOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyDuplicateReductionOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyDuplicateReductionOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + + case HASH_COUNT: + { + CountAggregate countAggregate = vectorGroupByInfo.getCountAggregation(); + CountAggregateKind countAggregateKind = countAggregate.getCountAggregationKind(); + + switch (countAggregateKind) { + case COUNT_STAR: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyCountStarOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyCountStarOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyCountStarOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyCountStarOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyCountStarOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_KEY: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyCountKeyOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyCountKeyOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyCountKeyOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyCountKeyOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyCountKeyOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_COLUMN: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyCountColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyCountColumnOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyCountColumnOperator.class; + break; + case SINGLE_KEY: + opClass = VectorGroupByHashSingleKeyCountColumnOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyCountColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + default: + throw new RuntimeException( + "Unexpected count aggregation kind " + countAggregateKind); + } + } + break; + + case HASH_WORD: + { + WordAggregate wordAggregate = vectorGroupByInfo.getWordAggregation(); + WordAggregateKind wordAggregateKind = wordAggregate.getWordAggregateKind(); + ColumnVector.Type inputColVectorType = wordAggregate.getInputColVectorType(); + switch (hashTableKeyType) { + case LONG: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashLongKeyLongMaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashLongKeyLongMinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashLongKeyLongSumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + // case DECIMAL_64: + // opClass = VectorGroupByHashDecimal64KeyCountColumnOperator.class; + // break; + case STRING: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashStringKeyLongMaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashStringKeyLongMinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashStringKeyLongSumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + case SINGLE_KEY: + switch (wordAggregateKind) { + case MAX: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashSingleKeyLongMaxColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case MIN: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashSingleKeyLongMinColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + case SUM: + switch (inputColVectorType) { + case LONG: + opClass = VectorGroupByHashSingleKeyLongSumColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected input column vector kind " + inputColVectorType); + } + break; + default: + throw new RuntimeException( + "Unexpected word aggregation kind " + wordAggregateKind); + } + break; + // case MULTI_KEY: + // opClass = VectorGroupByHashMultiKeyCountColumnOperator.class; + // break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + + + } + break; + + default: + throw new RuntimeException("Unexpected aggregation variation " + aggregationVariation); + } + + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + vectorDesc.setIsNative(true); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), desc, vContext, vectorDesc); + LOG.info("Vectorizer vectorizeOperator group by class " + vectorOp.getClass().getSimpleName()); + + return vectorOp; + } + + private ImmutablePair checkSupportedWordAggregate( + VectorAggregationDesc vecAggrDesc, boolean isSingleColumnKey) { + + final String aggregationName = vecAggrDesc.getAggregationName(); + + if (!isSingleColumnKey) { + return new ImmutablePair(null, "Multi-key for " + aggregationName + " not implemented"); + } + + final WordAggregateKind wordAggregateKind; + switch (aggregationName) { + case "max": + wordAggregateKind = WordAggregateKind.MAX; + break; + case "min": + wordAggregateKind = WordAggregateKind.MIN; + break; + case "sum": + wordAggregateKind = WordAggregateKind.SUM; + break; + default: + return new ImmutablePair(null, aggregationName + " not implemented"); + } + + ColumnVector.Type inputColVectorType = vecAggrDesc.getInputColVectorType(); + if (inputColVectorType != ColumnVector.Type.LONG) { + return new ImmutablePair( + null, "input column vector type " + inputColVectorType + " for " + + aggregationName + "not implemented"); + } + + ColumnVector.Type outputColVectorType = vecAggrDesc.getOutputColVectorType(); + if (outputColVectorType != ColumnVector.Type.LONG && + outputColVectorType != ColumnVector.Type.BYTES) { + return new ImmutablePair( + null, "output column vector type " + outputColVectorType + " for " + + aggregationName + " not implemented"); + } + + return + new ImmutablePair( + new WordAggregate( + vecAggrDesc.getInputExpression().getOutputColumnNum(), + wordAggregateKind, + inputColVectorType), null); + } + + private boolean canSpecializeGroupBy(GroupByDesc desc, VectorGroupByDesc vectorDesc, + boolean isTezOrSpark, VectorizationContext vContext) throws HiveException { + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + + List vectorizationIssueList = new ArrayList(); + + List keyDescs = desc.getKeys(); + final boolean isEmptyKey = keyDescs.isEmpty(); + final int outputKeyLength = keyDescs.size(); + + GroupByDesc.Mode groupByMode = desc.getMode(); + ProcessingMode processingMode = vectorDesc.getProcessingMode(); + + VectorExpression[] vecKeyExprs = vectorDesc.getKeyExpressions(); + final int vecKeyExprSize = vecKeyExprs.length; + boolean isSingleColumnKey = (vecKeyExprSize == 1); + + VectorAggregationDesc[] vecAggrDescs = vectorDesc.getVecAggrDescs(); + final int vecAggrDescSize = (vecAggrDescs == null ? 0 : vecAggrDescs.length); + + List aggrDescList = desc.getAggregators(); + + boolean isHash = (groupByMode == GroupByDesc.Mode.HASH); + final AggregationVariation aggregationVariation; + + CountAggregate countAggregate = null; + WordAggregate wordAggregate = null; + + if (!isHash) { + + // FUTURE: For now, we only do specialized implementations for HASH mode. + + aggregationVariation = AggregationVariation.NONE; + + } else if (vecAggrDescSize == 0) { + + // No aggregations just means the key is being grouped. We are getting rid of duplicate keys. + + aggregationVariation = AggregationVariation.HASH_DUPLICATE_REDUCTION; + + } else if (vecAggrDescSize == 1) { + + // Single COUNT, or single {MAX|MIN|SUM} word-sized aggregation on a supported data type? + + if (aggrDescList.get(0).getGenericUDAFName().equalsIgnoreCase("count")) { + + // Single COUNT aggregation specialization. Store key and count in hash table without a + // hash element. + + AggregationDesc countAggrDesc = aggrDescList.get(0); + List countParamList = countAggrDesc.getParameters(); + final int countParamSize = countParamList.size(); + if (countParamSize == 0) { + + // COUNT(*) + + aggregationVariation = AggregationVariation.HASH_COUNT; + countAggregate = + new CountAggregate(CountAggregateKind.COUNT_STAR); + + } else if (countParamSize == 1) { + + aggregationVariation = AggregationVariation.HASH_COUNT; + + VectorAggregationDesc countVecAggrDesc = vecAggrDescs[0]; + + final int inputColumnNum = countVecAggrDesc.getInputExpression().getOutputColumnNum(); + + boolean isKey = false; + for (VectorExpression vecKeyExpr : vecKeyExprs) { + if (vecKeyExpr.getOutputColumnNum() == inputColumnNum) { + isKey = true; + break; + } + } + if (isKey) { + countAggregate = + new CountAggregate(CountAggregateKind.COUNT_KEY); + } else { + countAggregate = + new CountAggregate(CountAggregateKind.COUNT_COLUMN, inputColumnNum); + } + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add( + "Cannot specialize aggregation function " + countAggrDesc.getGenericUDAFName() + + " that has more than 1 input parameter"); + } + } else { + + // Single {MAX|MIN|SUM} on a supported word-size data type? + ImmutablePair pair = + checkSupportedWordAggregate(vecAggrDescs[0], isSingleColumnKey); + if (pair.left != null) { + + aggregationVariation = AggregationVariation.HASH_WORD; + wordAggregate = pair.left; + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add(pair.right); + } + } + } else { + + // FUTURE: Perhaps more aggregation variations will be supported... + aggregationVariation = AggregationVariation.NONE; + } + + // TEMPORARY: Restriction + + final VectorGroupByInfo.HashTableKeyType hashTableKeyType; + if (isSingleColumnKey) { + ColumnVector.Type colVectorType = vecKeyExprs[0].getOutputColumnVectorType(); + switch (colVectorType) { + case LONG: + + // Integer family, date, interval year month. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.LONG; + break; + + case DECIMAL_64: + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.DECIMAL_64; + break; + + case BYTES: + + // String family. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.STRING; + break; + + default: + + // All other data types get serialized. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.SINGLE_KEY; + break; + } + } else { + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.MULTI_KEY; + } + + vectorGroupByInfo.setIsVectorizationGroupByNativeEnabled( + weCanAttemptGroupByNativeVectorization); + vectorGroupByInfo.setEngine(engine); + + vectorGroupByInfo.setVectorizationIssueList(vectorizationIssueList); + + vectorGroupByInfo.setAggregationVariation(aggregationVariation); + + vectorGroupByInfo.setCountAggregate(countAggregate); + vectorGroupByInfo.setWordAggregate(wordAggregate); + + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(testGroupByMaxMemoryAvailable); + + // So EXPLAIN VECTORIZATION can show native conditions, etc. + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + if (!weCanAttemptGroupByNativeVectorization || + !isTezOrSpark || + (aggregationVariation == AggregationVariation.NONE) || + groupByMode != GroupByDesc.Mode.HASH || + desc.isGroupingSetsPresent() || + vectorizationIssueList.size() > 0) { + return false; + } + + return true; + } + private Operator specializeReduceSinkOperator( Operator op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkDesc vectorDesc) throws HiveException { @@ -4534,7 +5104,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } // No support for DECIMAL_64 input. We must convert. - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; // Fall through... @@ -4550,7 +5121,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { // we have to make sure same decimal type should be used during bloom filter creation // and bloom filter probing if (aggregationName.equals("bloom_filter")) { - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; } final VectorAggregationDesc vecAggrDesc = @@ -4562,7 +5134,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } // No support for DECIMAL_64 input. We must convert. - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; // Fall through... @@ -4600,16 +5173,30 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { - ImmutablePair,String> pair = + String issue = + doVectorizeGroupByOperatorPreparation( + groupByOp, vContext, vectorGroupByDesc); + Preconditions.checkState(issue == null); + return doVectorizeGroupByOperator( groupByOp, vContext, vectorGroupByDesc); - return pair.left; + } + + private static Operator doVectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext, + VectorGroupByDesc vectorGroupByDesc) + throws HiveException { + Operator vectorOp = + OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), (GroupByDesc) groupByOp.getConf(), + vContext, vectorGroupByDesc); + return vectorOp; } /* * NOTE: The VectorGroupByDesc has already been allocated and will be updated here. */ - private static ImmutablePair,String> doVectorizeGroupByOperator( + public static String doVectorizeGroupByOperatorPreparation( Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { @@ -4618,9 +5205,10 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { List keysDesc = groupByDesc.getKeys(); - // For now, we don't support group by on DECIMAL_64 keys. + // Allow DECIMAL_64 key expressions in preparation because we may specialize. + // Later we will verify again. VectorExpression[] vecKeyExpressions = - vContext.getVectorExpressionsUpConvertDecimal64(keysDesc); + vContext.getVectorExpressions(keysDesc); ArrayList aggrDesc = groupByDesc.getAggregators(); final int size = aggrDesc.size(); @@ -4631,7 +5219,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { ImmutablePair pair = getVectorAggregationDesc(aggDesc, vContext); if (pair.left == null) { - return new ImmutablePair, String>(null, pair.right); + return pair.right; } vecAggrDescs[i] = pair.left; @@ -4642,11 +5230,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); vectorGroupByDesc.setVecAggrDescs(vecAggrDescs); vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); - Operator vectorOp = - OperatorFactory.getVectorOperator( - groupByOp.getCompilationOpContext(), groupByDesc, - vContext, vectorGroupByDesc); - return new ImmutablePair, String>(vectorOp, null); + + return null; // No issue. } public static Operator vectorizeSelectOperator( @@ -4723,7 +5308,8 @@ private static VectorExpression fixDecimalDataTypePhysicalVariations(final Vecto oldExpression = children[i]; // we found at least one children with mismatch if (oldExpression.getOutputDataTypePhysicalVariation() == DataTypePhysicalVariation.DECIMAL_64) { - newExpression = vContext.wrapWithDecimal64ToDecimalConversion(oldExpression); + newExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(oldExpression, vContext); children[i] = newExpression; inputArgsChanged = true; dataTypePhysicalVariations[i] = DataTypePhysicalVariation.NONE; @@ -4753,8 +5339,9 @@ private static VectorExpression fixDecimalDataTypePhysicalVariations(final Vecto arguments[arguments.length - 1] = parent.getOutputColumnNum(); } // re-instantiate the parent expression with new arguments - VectorExpression newParent = vContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), - parent.getOutputDataTypePhysicalVariation(), arguments); + VectorExpression newParent = + VectorizationContext.instantiateExpression(parent.getClass(), parent.getOutputTypeInfo(), + parent.getOutputDataTypePhysicalVariation(), vContext, arguments); newParent.setOutputTypeInfo(parent.getOutputTypeInfo()); newParent.setOutputDataTypePhysicalVariation(parent.getOutputDataTypePhysicalVariation()); newParent.setInputTypeInfos(parent.getInputTypeInfos()); @@ -5298,23 +5885,51 @@ private static VectorPTFInfo createVectorPTFInfo(Operator,String> pair = - doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); - if (pair.left == null) { - setOperatorIssue(pair.right); + String issue = + doVectorizeGroupByOperatorPreparation(op, vContext, vectorGroupByDesc); + if (issue != null) { + setOperatorIssue(issue); throw new VectorizerCannotVectorizeException(); } - vectorOp = pair.left; - isNative = false; + + GroupByDesc groupByDesc = (GroupByDesc) op.getConf(); + boolean specialize = + canSpecializeGroupBy(groupByDesc, vectorGroupByDesc, isTezOrSpark, vContext); + + if (!specialize) { + + // Re-validate -- this time do not allow Complex Type keys. + boolean isNoComplexTypeKey = + validateExprNodeDescNoComplex(groupByDesc.getKeys(), "Key"); + if (!isNoComplexTypeKey) { + throw new VectorizerCannotVectorizeException(); + } + + // Regular VectorGroupByOperator does not support DECIMAL_64 keys. + VectorizationContext.upConvertDecimal64( + vectorGroupByDesc.getKeyExpressions(), vContext); + + vectorOp = + doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); + isNative = false; + + } else { + + vectorOp = + specializeGroupByOperator(op, vContext, groupByDesc, vectorGroupByDesc); + isNative = true; + } if (vectorTaskColumnInfo != null) { VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); if (usesVectorUDFAdaptor(vecKeyExpressions)) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); - for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { - if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { - vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + if (vecAggrDescs != null) { + for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { + if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 31237c8..acd0c63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; @@ -31,7 +33,10 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; - +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.CountAggregate.CountAggregateKind; /** * GroupByDesc. @@ -324,26 +329,38 @@ public Object clone() { this.groupingSetPosition, this.isDistinct); } + // Use LinkedHashSet to give predictable display order. + private static final Set vectorizableGroupByNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { private final GroupByDesc groupByDesc; private final VectorGroupByDesc vectorGroupByDesc; + private final VectorGroupByInfo vectorGroupByInfo; + + private VectorizationCondition[] nativeConditions; public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorGroupByDesc vectorGroupByDesc) { - // Native vectorization not supported. - super(vectorGroupByDesc, false); + super(vectorGroupByDesc, vectorGroupByDesc.isNative()); this.groupByDesc = groupByDesc; this.vectorGroupByDesc = vectorGroupByDesc; + vectorGroupByInfo = vectorGroupByDesc.getVectorGroupByInfo(); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getKeysExpression() { return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getAggregators() { + if (isNative) { + return null; + } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); List vecAggrList = new ArrayList(vecAggrDescs.length); for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { @@ -352,17 +369,20 @@ public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, return vecAggrList; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProcessingMode() { return vectorGroupByDesc.getProcessingMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getGroupByMode() { return groupByDesc.getMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getVectorOutputConditionsNotMet() { List results = new ArrayList(); @@ -379,13 +399,110 @@ public String getGroupByMode() { return results; } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProjectedOutputColumnNums() { return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorGroupByInfo.getIsVectorizationGroupByNativeEnabled(); + + String engine = vectorGroupByInfo.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableGroupByNativeEngines; + boolean engineInSupported = vectorizableGroupByNativeEngines.contains(engine); + + final List vectorizationIssueList = vectorGroupByInfo.getVectorizationIssueList(); + + List conditionList = new ArrayList(); + conditionList.add( + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED.varname)); + conditionList.add( + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName)); + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + conditionList.add( + new VectorizationCondition( + (aggregationVariation == AggregationVariation.HASH_COUNT || + aggregationVariation == AggregationVariation.HASH_DUPLICATE_REDUCTION || + aggregationVariation == AggregationVariation.HASH_WORD), + "Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate")); + conditionList.add( + new VectorizationCondition( + (vectorGroupByDesc.getProcessingMode() == ProcessingMode.HASH), + "Group By Mode HASH")); + conditionList.add( + new VectorizationCondition( + !groupByDesc.isGroupingSetsPresent(), + "No Grouping Sets")); + if (vectorizationIssueList.size() != 0) { + conditionList.add( + new VectorizationCondition( + true, + "Has issues \"" + + vectorizationIssueList.toString() + "\"")); + } + + VectorizationCondition[] conditions = + conditionList.toArray(new VectorizationCondition[0]); + + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "countAggreation", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getCountAggreation() { + if (!isNative) { + return null; + } + final CountAggregateKind countAggregateKind = + vectorGroupByInfo.getCountAggregation().getCountAggregationKind(); + if (countAggregateKind == CountAggregateKind.NONE) { + return null; + } + return countAggregateKind.name(); + } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public GroupByOperatorExplainVectorization getGroupByVectorization() { VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) getVectorDesc(); if (vectorGroupByDesc == null) { @@ -404,11 +521,14 @@ public static String getComplexTypeEnabledCondition( public static String getComplexTypeWithGroupByEnabledCondition( boolean isVectorizationComplexTypesEnabled, boolean isVectorizationGroupByComplexTypesEnabled) { - final boolean enabled = (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + final boolean enabled = + (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); return "(" + - HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationComplexTypesEnabled + " AND " + - HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationGroupByComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationGroupByComplexTypesEnabled + ") IS " + enabled; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index caf0c67..b7e60f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -67,8 +67,12 @@ private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isNative; + private VectorGroupByInfo vectorGroupByInfo; + public VectorGroupByDesc() { - this.processingMode = ProcessingMode.NONE; + processingMode = ProcessingMode.NONE; + isNative = false; } public void setProcessingMode(ProcessingMode processingMode) { @@ -78,6 +82,14 @@ public ProcessingMode getProcessingMode() { return processingMode; } + public void setIsNative(boolean isNative) { + this.isNative = isNative; + } + + public boolean isNative() { + return isNative; + } + public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } @@ -118,6 +130,14 @@ public boolean getIsVectorizationGroupByComplexTypesEnabled() { return isVectorizationGroupByComplexTypesEnabled; } + public void setVectorGroupByInfo(VectorGroupByInfo vectorGroupByInfo) { + this.vectorGroupByInfo = vectorGroupByInfo; + } + + public VectorGroupByInfo getVectorGroupByInfo() { + return vectorGroupByInfo; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java new file mode 100644 index 0000000..81ac6f3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; + +/** + * VectorGroupByInfo. + * + * A convenience data structure that has information needed to vectorize group by. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and agains by the VectorGroupByOperator's + * constructors and later during execution. + */ +public class VectorGroupByInfo { + + private static long serialVersionUID = 1L; + + public enum HashTableKeyType { + NONE, + LONG, + DECIMAL_64, + STRING, + SINGLE_KEY, + MULTI_KEY + } + + //------------------------------------------------------------------------------------------------ + + public enum AggregationVariation { + NONE, + HASH_DUPLICATE_REDUCTION, + HASH_WORD, + HASH_COUNT + } + + public static class CountAggregate { + + public enum CountAggregateKind { + NONE, + COUNT_STAR, + COUNT_KEY, + COUNT_COLUMN + } + + private final CountAggregateKind countAggregateKind; + private final int countColumnNum; + + public CountAggregate(CountAggregateKind countAggregateKind) { + this.countAggregateKind = countAggregateKind; + countColumnNum = -1; + } + + public CountAggregate(CountAggregateKind countAggregateKind, + int countColumnNum) { + this.countAggregateKind = countAggregateKind; + this.countColumnNum = countColumnNum; + } + + public CountAggregateKind getCountAggregationKind() { + return countAggregateKind; + } + + public int getCountColumnNum() { + return countColumnNum; + } + } + + public static class WordAggregate { + + public enum WordAggregateKind { + NONE, + MAX, + MIN, + SUM + } + + private final int wordAggregateColumnNum; + private final WordAggregateKind wordAggregateKind; + private final ColumnVector.Type inputColVectorType; + + public WordAggregate(int wordAggregateColumnNum, WordAggregateKind wordAggregateKind, + ColumnVector.Type inputColVectorType) { + this.wordAggregateColumnNum = wordAggregateColumnNum; + this.wordAggregateKind = wordAggregateKind; + this.inputColVectorType = inputColVectorType; + } + + public int getWordAggregateColumnNum() { + return wordAggregateColumnNum; + } + + public WordAggregateKind getWordAggregateKind() { + return wordAggregateKind; + } + + public ColumnVector.Type getInputColVectorType() { + return inputColVectorType; + } + } + + //--------------------------------------------------------------------------- + + private boolean isVectorizationGroupByNativeEnabled; + private String engine; + + private List vectorizationIssueList; + + private AggregationVariation aggregationVariation; + + private CountAggregate countAggregate; + private WordAggregate wordAggregate; + + private HashTableKeyType hashTableKeyType; + + private int testGroupByMaxMemoryAvailable; + + public VectorGroupByInfo() { + isVectorizationGroupByNativeEnabled = false; + + vectorizationIssueList = null; + + hashTableKeyType = HashTableKeyType.NONE; + + testGroupByMaxMemoryAvailable = -1; + } + + public boolean getIsVectorizationGroupByNativeEnabled() { + return isVectorizationGroupByNativeEnabled; + } + + public void setIsVectorizationGroupByNativeEnabled(boolean isVectorizationGroupByNativeEnabled) { + this.isVectorizationGroupByNativeEnabled = isVectorizationGroupByNativeEnabled; + } + + public String getEngine() { + return engine; + } + + public void setEngine(String engine) { + this.engine = engine; + } + + public List getVectorizationIssueList() { + return vectorizationIssueList; + } + + public void setVectorizationIssueList(List vectorizationIssueList) { + this.vectorizationIssueList = vectorizationIssueList; + } + + public void setAggregationVariation(AggregationVariation aggregationVariation) { + this.aggregationVariation = aggregationVariation; + } + + public AggregationVariation getAggregationVariation() { + return aggregationVariation; + } + + public void setCountAggregate(CountAggregate countAggregate) { + this.countAggregate = countAggregate; + } + + public CountAggregate getCountAggregation() { + return countAggregate; + } + + public void setWordAggregate(WordAggregate wordAggregate) { + this.wordAggregate = wordAggregate; + } + + public WordAggregate getWordAggregation() { + return wordAggregate; + } + + public HashTableKeyType getHashTableKeyType() { + return hashTableKeyType; + } + + public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { + this.hashTableKeyType = hashTableKeyType; + } + + public int getTestGroupByMaxMemoryAvailable() { + return testGroupByMaxMemoryAvailable; + } + + public void setTestGroupByMaxMemoryAvailable(int testGroupByMaxMemoryAvailable) { + this.testGroupByMaxMemoryAvailable = testGroupByMaxMemoryAvailable; + } +} diff --git ql/src/test/queries/clientpositive/vector_groupby_multikey.q ql/src/test/queries/clientpositive/vector_groupby_multikey.q new file mode 100644 index 0000000..c91b026 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_multikey.q @@ -0,0 +1,151 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt; +CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_multi_1a values (NULL, NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_multi_1a values (date '2207-09-16', -13); +insert into groupby_multi_1a values (date '2018-04-20', 18); + +CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt; +CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt; + +insert into groupby_multi_1a values (date '2111-10-04', -81); +insert into groupby_multi_1a values (date '2018-04-21', 19); + + + +-- *_multi_1a + +-- COUNT_KEY +-- explain vectorization operator +-- select key0, key1, count(key0, key1) from groupby_multi_1a group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- COUNT_STAR +explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1; + +-- *_multi_1a_nonull + +-- COUNT_KEY +-- select key0, key1, count(key0, key1) from groupby_multi_1a_nonull group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- COUNT_STAR +select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1; + + + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- MULTI-KEY: STRING, BOOLEAN +-- explain vectorization operator +-- select s, bo, count(s, bo) from over10k group by s, bo order by s, bo limit 10; +-- select s, bo, count(s, bo) from over10k group by s, bo order by s, bo limit 10; + +explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10; +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10; + +explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10; +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10; + +-- MULTI-KEY: TIMESTAMP, SMALLINT +-- explain vectorization operator +-- select ts, si, count(ts, si) from over10k group by ts, si order by ts, si limit 10; +-- select ts, si, count(ts, si) from over10k group by ts, si order by ts, si limit 10; + +explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10; +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10; + +explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10; +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10; + +-- MULTI-KEY: DECIMAL: BINARY +-- explain vectorization operator +-- select `dec`, bin, count(`dec`, bin) from over10k group by `dec`, bin order by `dec`, bin limit 10; +-- select `dec`, bin, count(`dec`, bin) from over10k group by `dec`, bin order by `dec`, bin limit 10; + +explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10; +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10; + +explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10; +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +-- explain vectorization operator +-- select i, b, count(i, b) from over10k group by i, b order by i, b limit 10; +-- select i, b, count(i, b) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10; +select i, b, count(si) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10; +select i, b, count(*) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10; +select i, b from over10k group by i, b order by i, b limit 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_singlekey.q ql/src/test/queries/clientpositive/vector_groupby_singlekey.q new file mode 100644 index 0000000..b68a4c6 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_singlekey.q @@ -0,0 +1,710 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt; +CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_long_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_long_1a values (-5206670856103795573); +insert into groupby_long_1a values (800); + +CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt; +CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt; + +insert into groupby_long_1a_nonull values (-6187919478609154811); +insert into groupby_long_1a_nonull values (1000); + + + +CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt; +CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt; + +insert into groupby_long_1b values (NULL); + +insert into groupby_long_1b values (32030); +insert into groupby_long_1b values (800); + +CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt; +CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt; + +insert into groupby_long_1b_nonull values (31713); +insert into groupby_long_1b_nonull values (34); + + + +CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt; +CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt; + +insert into groupby_long_1c values (NULL, NULL); +insert into groupby_long_1c values (NULL, 'TKTKGVGFW'); +insert into groupby_long_1c values (NULL, 'NEW'); + +CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt; +CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt; + +insert into groupby_long_1c values (1928928239, NULL); +insert into groupby_long_1c values (9999, 'NEW'); + + + +-- *_long_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1a group by key order by key; +select key from groupby_long_1a group by key order by key; +select key from groupby_long_1a where key != -8460550397108077433 group by key order by key; + +-- *_long_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1a_nonull group by key; +select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1a_nonull group by key; +select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key; +select key from groupby_long_1a_nonull group by key order by key; +select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key; + +-- *_long_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b where key != 32030 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b where key != 32030 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1b group by key order by key; +select key from groupby_long_1b group by key order by key; +select key from groupby_long_1b where key != -32030 group by key order by key; + +-- *_long_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1b_nonull group by key; +select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1b_nonull group by key; +select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key; +select key from groupby_long_1b_nonull group by key order by key; +select key from groupby_long_1b_nonull where key != -32030 group by key order by key; + +-- *_long_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_COLUMN +explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1c group by key order by key; +select key from groupby_long_1c group by key order by key; +select key from groupby_long_1c where key != -32030 group by key order by key; + +-- *_long_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1c_nonull group by key; +select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1c_nonull group by key; +select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_COLUMN +select key, count(b_string) from groupby_long_1c_nonull group by key; +select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key; +select key from groupby_long_1c_nonull group by key order by key; +select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key; + + +set hive.llap.io.enabled=false; +set hive.llap.io.encode.enabled=false; + +CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a; + +-- Add a single NULL row. +insert into groupby_decimal64_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_decimal64_1a values (333.33); +insert into groupby_decimal64_1a values (800); + +CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull; + +insert into groupby_decimal64_1a_nonull values (-76.2); +insert into groupby_decimal64_1a_nonull values (100); + + +CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b; + +insert into groupby_decimal64_1b values (NULL, NULL); + +insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01); +insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800); + +CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull; + +insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02); +insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34); + + +-- *_decimal64_1a + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1a group by key; +select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1a group by key; +select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1a group by key order by key; +select key from groupby_decimal64_1a group by key order by key; +select key from groupby_decimal64_1a where key != -0.342 group by key order by key; + + +-- *_decimal64_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1a_nonull group by key; +select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1a_nonull group by key; +select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key; +select key from groupby_decimal64_1a_nonull group by key order by key; +select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key; + + +-- *_decimal64_1b + +-- COUNT_KEY +explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key; +select key, count(key) from groupby_decimal64_1b group by key; +select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key; + +-- COUNT_STAR +explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key; +select key, count(*) from groupby_decimal64_1b group by key; +select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key; + +-- COUNT_COLUMN +explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key; +select key, count(c_timestamp) from groupby_decimal64_1b group by key; +select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1b group by key order by key; +select key from groupby_decimal64_1b group by key order by key; +select key from groupby_decimal64_1b where key != 11041.91 group by key order by key; + +-- *_decimal64_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1b_nonull group by key; +select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1b_nonull group by key; +select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- COUNT_COLUMN +select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key; +select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key; +select key from groupby_decimal64_1b_nonull group by key order by key; +select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + + + +CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt; +CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_string_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt; +CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt; + +insert into groupby_string_1a_nonull values ('PXLD'); +insert into groupby_string_1a_nonull values ('AA'); + +-- Use same data as 1a. +CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt; +CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt; + +insert into groupby_string_1a values (NULL); + +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt; +CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt; + +insert into groupby_string_1b_nonull values ('PXLD'); +insert into groupby_string_1b_nonull values ('AA'); + +CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt; +CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt; + +insert into groupby_string_1c values (NULL, NULL, NULL); +insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values (NULL, '2018-04-11', NULL); + +insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL); +insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL); + +insert into groupby_string_1c values ('BB', NULL, NULL); +insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('DD', '2018-04-14', NULL); + +CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt; +CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt; + +insert into groupby_string_1c_nonull values ('SDA', NULL, NULL); +insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL); + +insert into groupby_string_1c_nonull values ('EEE', NULL, NULL); +insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888'); +insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL); + +-- *_string_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a where key != 'PXLD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a where key != 'PXLD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1a group by key order by key; +select key from groupby_string_1a group by key order by key; +select key from groupby_string_1a where key != 'PXLD' group by key order by key; + +-- *_string_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1a_nonull group by key; +select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1a_nonull group by key; +select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key; +select key from groupby_string_1a_nonull group by key order by key; +select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key; + +-- *_string_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b where key != 'MXGD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b where key != 'MXGD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1b group by key order by key; +select key from groupby_string_1b group by key order by key; +select key from groupby_string_1b where key != 'MXGD' group by key order by key; + +-- *_string_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1b_nonull group by key; +select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1b_nonull group by key; +select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key; +select key from groupby_string_1b_nonull group by key order by key; +select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key; + +-- *_string_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1c group by key order by key; +select key from groupby_string_1c group by key order by key; +select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key; + +-- *_string_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1c_nonull group by key; +select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1c_nonull group by key; +select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +select key, count(s_date) from groupby_string_1c_nonull group by key; +select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +select key, count(s_timestamp) from groupby_string_1c_nonull group by key; +select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key; +select key from groupby_string_1c_nonull group by key order by key; +select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key; + + + +CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt; +CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt; + +CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt; +CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt; + + +CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt; +CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt; + +CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt; +CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt; + + +-- *_serialize_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1a group by key order by key; +select key from groupby_serialize_1a group by key order by key; +select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key; + +-- *_serialize_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1a_nonull group by key; +select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1a_nonull group by key; +select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key; +select key from groupby_serialize_1a_nonull group by key order by key; +select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key; + +-- *_serialize_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1b group by key order by key; +select key from groupby_serialize_1b group by key order by key; +select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key; + +-- *_serialize_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1b_nonull group by key; +select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1b_nonull group by key; +select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +select key, count(c_smallint) from groupby_serialize_1b_nonull group by key; +select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +select key, count(c_string) from groupby_serialize_1b_nonull group by key; +select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key; +select key from groupby_serialize_1b_nonull group by key order by key; +select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key; + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- STRING +explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10; +select s, count(s) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10; +select s, count(ts) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10; +select s, count(*) from over10k group by s order by s limit 10; + +-- SERIALIZE TIMESTAMP +explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10; +select ts, count(ts) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10; +select ts, count(d) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10; +select ts, count(*) from over10k group by ts order by ts limit 10; + +-- SERIALIZE DECIMAL +explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10; +select i, count(i) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10; +select i, count(b) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10; +select i, count(*) from over10k group by i order by i limit 10; + +explain vectorization operator +select i from over10k group by i order by i limit 10; +select i from over10k group by i order by i limit 10; diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 04518b3..6937aaf 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1649,6 +1649,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -1744,6 +1746,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -1758,6 +1762,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2443,6 +2449,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -2538,6 +2546,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2552,6 +2562,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index f078ecc..830985f 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1726,6 +1726,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:tinyint native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: tinyint) @@ -2107,6 +2109,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 1e2b330..12954ef 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -75,6 +75,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -257,6 +259,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index b86d822..2accfb0 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -126,6 +126,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -2055,6 +2057,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -3123,6 +3127,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index a804e3c..96c7ead 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -255,6 +255,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -340,11 +342,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -371,20 +373,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -398,11 +400,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -416,11 +418,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -434,11 +436,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -465,20 +467,20 @@ POSTHOOK: Lineage: orc_struct_type.st2 SIMPLE [(orc_struct_type_staging)orc_stru PREHOOK: query: select count(*) from orc_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from orc_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -492,11 +494,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -510,11 +512,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from orc_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -528,11 +530,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@orc_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from orc_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index baeb60e..708aa75 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -78,11 +78,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -154,11 +154,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -231,6 +231,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -343,11 +345,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2144 2145 2142 2143 2140 2141 @@ -376,11 +378,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -452,11 +454,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -529,6 +531,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -641,11 +645,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2146 2147 2144 2145 2142 2143 @@ -674,11 +678,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -750,11 +754,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -827,6 +831,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -939,11 +945,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2148 2149 2146 2147 2144 2145 diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index b036cdd..83624c0 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,11 +88,11 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 @@ -167,12 +167,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -247,6 +247,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:string native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) @@ -360,12 +362,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -396,22 +398,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -426,12 +428,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -462,22 +464,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -492,12 +494,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index fec8093..3898dc0 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -255,6 +255,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -340,11 +342,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -371,20 +373,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -398,11 +400,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -416,11 +418,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -434,11 +436,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -465,20 +467,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -492,11 +494,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -510,11 +512,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -528,11 +530,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index 7e3998d..2f4598d 100644 --- ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -999,11 +999,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -1028,7 +1028,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1126,11 +1126,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -1155,7 +1155,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index a9971d4..f59b7c5 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -285,6 +287,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -424,6 +428,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index 396afd3..0617234 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index eb4b262..b0f1739 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -276,10 +276,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index a2a765b..ca7eec5 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -164,10 +164,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -189,7 +189,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -370,10 +370,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -395,7 +395,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -768,10 +768,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -793,7 +793,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1119,11 +1119,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 7:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1148,7 +1148,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1257,11 +1257,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 8:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1286,7 +1286,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1395,11 +1395,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1424,7 +1424,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1533,11 +1533,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1562,7 +1562,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index cabc2b7..4805aff 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -178,6 +178,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -369,11 +371,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 10:binary - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) @@ -398,7 +400,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 060281a..a923d9e 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -158,6 +158,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index b58de03..7d317e9 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -119,6 +119,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -324,6 +326,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index 3776f3e..41340a6 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -73,11 +73,11 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -102,7 +102,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -298,11 +298,11 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyLongSumColumnOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -327,7 +327,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index b6247ef..0afdc37 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -935,10 +935,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -960,7 +961,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1052,11 +1053,26 @@ STAGE PLANS: TableScan alias: orc_create_complex_n0 Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:str:string, 1:mp:map, 2:lst:array, 3:strct:struct, 4:val:string, 5:ROW__ID:struct] Select Operator expressions: lst (type: array), strct (type: struct) outputColumnNames: lst, strct + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 2:array, col 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] keys: lst (type: array), strct (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1065,15 +1081,30 @@ STAGE PLANS: key expressions: _col0 (type: array), _col1 (type: struct) sort order: ++ Map-reduce partition columns: _col0 (type: array), _col1 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [2, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1155,11 +1186,12 @@ STAGE PLANS: Group By Operator aggregations: count(val) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: str (type: string) @@ -1186,7 +1218,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1251,9 +1283,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex_n0 #### A masked pattern was here #### str _c1 -line2 4501 -line3 4501 -line1 4501 +line3 0 + 0 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT strct.B, count(val) FROM orc_create_complex_n0 GROUP BY strct.B PREHOOK: type: QUERY @@ -1298,11 +1329,12 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH keyExpressions: col 6:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -1329,7 +1361,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1394,9 +1426,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex_n0 #### A masked pattern was here #### strct.b _c1 -six 4501 -two 4501 -four 4501 +two 0 + 0 +four 0 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT strct, mp, lst, str, count(val) FROM orc_create_complex_n0 GROUP BY strct, mp, lst, str PREHOOK: type: QUERY @@ -1425,12 +1457,28 @@ STAGE PLANS: TableScan alias: orc_create_complex_n0 Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:str:string, 1:mp:map, 2:lst:array, 3:strct:struct, 4:val:string, 5:ROW__ID:struct] Select Operator expressions: str (type: string), mp (type: map), lst (type: array), strct (type: struct), val (type: string) outputColumnNames: str, mp, lst, strct, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(val) + Group By Vectorization: + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN + groupByMode: HASH + keyExpressions: col 0:string, col 1:map, col 2:array, col 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: str (type: string), mp (type: map), lst (type: array), strct (type: struct) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1439,16 +1487,31 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [4] Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type MAP not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1490,6 +1553,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex_n0 #### A masked pattern was here #### strct mp lst str _c4 -{"a":"one","b":"two"} {"key11":"value11","key12":"value12","key13":"value13"} ["a","b","c"] line1 4501 -{"a":"three","b":"four"} {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] line2 4501 -{"a":"five","b":"six"} {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] line3 4501 +NULL NULL NULL NULL 0 +{"a":"three","b":"four"} {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] line2 0 +{"a":"five","b":"six"} {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] line3 0 diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index dd54bd5..07cad98 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1265,10 +1265,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1292,7 +1293,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1319,10 +1320,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index 9bd7bc1..fee417a 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -381,6 +381,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index ef94587..9e42d0f 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -88,6 +88,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -268,6 +270,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -482,6 +486,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -681,6 +687,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5827587..62b539c 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -592,6 +592,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1210,6 +1212,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 859aeba..01cfa72 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2308,6 +2308,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -3249,6 +3251,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3413,6 +3417,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3659,6 +3665,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3789,6 +3797,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3915,10 +3925,11 @@ STAGE PLANS: Group By Operator aggregations: count(key) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:decimal(20,10)) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3941,7 +3952,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6286,6 +6297,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -7227,6 +7240,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7391,6 +7406,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7637,6 +7654,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7767,6 +7786,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7893,10 +7914,11 @@ STAGE PLANS: Group By Operator aggregations: count(key) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:decimal(15,3)/DECIMAL_64) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7919,7 +7941,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 747b74a..f6b5184 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -140,10 +140,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 357388 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) @@ -167,7 +168,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index a118b2e..f13bf30 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 6eaf7ad..3f567a7 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -60,6 +60,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -197,6 +199,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -360,6 +364,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -620,6 +626,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -974,6 +982,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -1009,6 +1019,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index f018a61..ebcccbb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -230,6 +232,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -388,6 +392,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -540,6 +546,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -692,6 +700,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -851,6 +861,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index ff300a0..8997544 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -75,6 +75,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -489,6 +493,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -601,11 +607,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col2 (type: bigint) @@ -794,6 +801,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -906,11 +915,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col2 (type: bigint) @@ -1095,6 +1105,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1421,6 +1433,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1740,6 +1754,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1906,6 +1922,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1978,11 +1996,12 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col2 (type: bigint) @@ -2123,6 +2142,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index c090051..be405f9 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -258,6 +260,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index eb5480d..fc75283 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -252,6 +254,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -414,6 +418,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -576,6 +582,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -732,6 +740,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -883,10 +893,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -911,7 +922,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1022,13 +1033,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -1053,7 +1065,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index 195ea0c..e6e8249 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -71,13 +71,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -102,7 +103,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -252,13 +253,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -283,7 +285,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -661,6 +663,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index 2350830..4323f89 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -83,6 +83,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -222,6 +224,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -387,6 +391,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index 31ccb5e..5368ff8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -333,6 +335,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -607,13 +611,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -638,7 +643,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index 07c4eed..f18fb6b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -72,10 +72,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -100,7 +101,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -145,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -263,10 +266,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -291,7 +295,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -336,6 +340,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -481,10 +487,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -509,7 +516,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -549,13 +556,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 4563bd6..7fece26 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -218,6 +220,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index a5a3758..15e6381 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -236,6 +238,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -403,6 +407,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -571,6 +577,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -776,6 +784,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -936,6 +946,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1103,6 +1115,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1264,6 +1278,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1466,10 +1482,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1494,7 +1511,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1621,10 +1638,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1649,7 +1667,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1778,10 +1796,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1806,7 +1825,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1930,6 +1949,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2095,6 +2116,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2260,6 +2283,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2420,6 +2445,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index e7c235a..ce89c59 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -295,6 +297,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -505,6 +509,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -713,6 +719,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -911,10 +919,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -940,7 +949,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1101,13 +1110,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -1133,7 +1143,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index abf1c89..57c9890 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -74,6 +74,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: category (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 26795d2..4604372 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -134,6 +134,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -149,10 +151,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out new file mode 100644 index 0000000..65951cf --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out @@ -0,0 +1,2465 @@ +PREHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a.key1 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 EXPRESSION [] +POSTHOOK: Lineage: groupby_multi_1a.key1 EXPRESSION [] +PREHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: Lineage: groupby_multi_1a_nonull.key0 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a_nonull.key1 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +NULL NULL 2 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +NULL NULL +PREHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a_nonull + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string), ts (type: timestamp) + outputColumnNames: bo, s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: s (type: string), bo (type: boolean) + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string, col 6:boolean + native: true + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string) + outputColumnNames: bo, s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: s (type: string), bo (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string, col 6:boolean + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), d (type: double), ts (type: timestamp) + outputColumnNames: si, d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: ts (type: timestamp), si (type: smallint) + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp, col 1:smallint + native: true + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), ts (type: timestamp) + outputColumnNames: si, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: ts (type: timestamp), si (type: smallint) + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp, col 1:smallint + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: f (type: float), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: f, dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: dec (type: decimal(4,2)), bin (type: binary) + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64, col 10:binary + native: true + Group By Operator + aggregations: count(f) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: dec (type: decimal(4,2)), bin (type: binary) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64, col 10:binary + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: si, i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: i (type: int), b (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int, col 3:bigint + native: true + Group By Operator + aggregations: count(si) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: i (type: int), b (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int, col 3:bigint + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: i (type: int), b (type: bigint) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int, col 3:bigint + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 +65536 4294967307 +65536 4294967308 +65536 4294967312 +65536 4294967317 +65536 4294967320 +65536 4294967326 +65536 4294967334 +65536 4294967336 +65536 4294967338 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index dfc8828..9e0bcc9 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -277,10 +277,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -305,7 +306,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -476,10 +477,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -503,7 +505,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -773,6 +775,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -839,6 +843,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: true (type: boolean) @@ -997,6 +1003,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index abf352d..8ae1137 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -327,6 +329,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -673,6 +677,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -708,6 +714,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out new file mode 100644 index 0000000..9474eb3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out @@ -0,0 +1,11883 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +NULL +PREHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a_nonull + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +1569543799237464101 +3313583664488247651 +968819023021777205 +PREHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +3313583664488247651 +968819023021777205 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +NULL +PREHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b_nonull + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +NULL +PREHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c_nonull + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +PREHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 +1928928239 +PREHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: insert into groupby_decimal64_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1a values (333.33) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (333.33) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp EXPRESSION [] +POSTHOOK: Lineage: groupby_decimal64_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:decimal(6,3)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(6,3) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +NULL +PREHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a_nonull + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:decimal(6,3)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(6,3) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountKeyOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_KEY + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountStarOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: c_timestamp (type: timestamp), key (type: decimal(8,2)) + outputColumnNames: c_timestamp, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_timestamp) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +NULL +PREHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b_nonull + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +NULL +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +QNCYBDW +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a_nonull + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +MXGDMBD +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +MXGD +NULL +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b_nonull + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +MXGD +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +NULL +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c_nonull + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +NULL +PREHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a_nonull + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +NULL +PREHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b_nonull + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string + native: true + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s (type: string) + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string + native: true + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 7:string + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: ts (type: timestamp) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp + native: true + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: ts (type: timestamp) + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp + native: true + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: ts (type: timestamp) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 8:timestamp + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: dec (type: decimal(4,2)) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64 + native: true + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: dec (type: decimal(4,2)) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64 + native: true + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: dec (type: decimal(4,2)) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 9:decimal(4,2)/DECIMAL_64 + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + top n: 10 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 2:int + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index edc1fae..38e24fb 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -73,10 +73,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -492,10 +493,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSingleKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -520,7 +522,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -561,10 +563,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:double) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -674,10 +677,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) @@ -702,7 +706,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -749,10 +753,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountStarOperator + countAggreation: COUNT_STAR groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out index d3ba688..b85241a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -82,10 +82,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyCountColumnOperator + countAggreation: COUNT_COLUMN groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index e0d533f..b9aa602 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -170,6 +170,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -301,6 +303,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index 3537c40..fd73728 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -320,10 +320,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -348,7 +349,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 9238bc7..bcfbe31 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -145,6 +145,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -297,6 +299,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -536,6 +540,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -942,6 +948,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index 3d78cfe..6b35ba5 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -78,7 +78,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 960f5f5..ef0f7a9 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -5873,9 +5873,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5889,7 +5890,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5987,9 +5988,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6003,7 +6005,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6103,9 +6105,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6119,7 +6122,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6214,9 +6217,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6230,7 +6234,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6333,9 +6337,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6349,7 +6354,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6416,9 +6421,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6432,7 +6438,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -6530,9 +6536,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6546,7 +6553,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -6641,9 +6648,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6657,7 +6665,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -6782,9 +6790,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6798,7 +6807,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6909,9 +6918,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -6925,7 +6935,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7052,9 +7062,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7068,7 +7079,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7176,9 +7187,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator @@ -7192,7 +7204,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7460,7 +7472,23 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true +<<<<<<< HEAD nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +======= + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +>>>>>>> 1a04fe1... more Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7483,9 +7511,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -7499,7 +7528,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7648,6 +7677,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true +<<<<<<< HEAD predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7676,6 +7706,18 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +======= + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +>>>>>>> 1a04fe1... more Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7684,7 +7726,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -7881,6 +7923,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true +<<<<<<< HEAD predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -7909,6 +7952,18 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +======= + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +>>>>>>> 1a04fe1... more Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7917,7 +7972,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -8046,6 +8101,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true +<<<<<<< HEAD Select Vectorization: className: VectorSelectOperator native: true @@ -8058,6 +8114,18 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +======= + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +>>>>>>> 1a04fe1... more Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8066,7 +8134,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -8205,6 +8273,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true +<<<<<<< HEAD Select Vectorization: className: VectorSelectOperator native: true @@ -8217,6 +8286,18 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +======= + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +>>>>>>> 1a04fe1... more Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8225,7 +8306,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -8381,6 +8462,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true +<<<<<<< HEAD Select Vectorization: className: VectorSelectOperator native: true @@ -8393,6 +8475,18 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +======= + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +>>>>>>> 1a04fe1... more Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8401,7 +8495,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -8512,9 +8606,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -8528,7 +8623,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -8677,9 +8772,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkStringOperator @@ -8693,7 +8789,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -8806,9 +8902,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -8831,7 +8937,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -8979,9 +9085,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -9004,7 +9120,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9154,9 +9270,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -9179,7 +9305,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9328,9 +9454,19 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -9353,7 +9489,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9506,9 +9642,19 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -9531,7 +9677,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -9626,9 +9772,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -9651,7 +9807,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -9803,9 +9959,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -9828,7 +9994,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -9977,9 +10143,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10002,7 +10178,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -10199,9 +10375,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10224,7 +10410,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -10385,9 +10571,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10410,7 +10606,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -10616,9 +10812,19 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -10641,7 +10847,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -10799,9 +11005,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -10824,7 +11040,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -10997,9 +11213,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -11022,6 +11244,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +<<<<<<< HEAD allNative: false usesVectorUDFAdaptor: false vectorized: true @@ -11240,6 +11463,41 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD +======= + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 0c8835c... pull Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11248,6 +11506,8 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +======= +>>>>>>> 1a04fe1... more allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -11274,9 +11534,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -11299,7 +11569,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11464,8 +11734,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -11498,7 +11782,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11698,8 +11982,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -11732,7 +12030,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -11945,8 +12243,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -11979,7 +12291,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -12172,8 +12484,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -12206,7 +12532,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -12430,8 +12756,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -12464,7 +12804,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -12712,9 +13052,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -12737,7 +13087,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -12951,9 +13301,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: string) mode: hash @@ -12976,7 +13336,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -13089,9 +13449,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13114,7 +13484,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -13259,9 +13629,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13284,7 +13664,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -13431,9 +13811,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13456,7 +13846,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -13602,9 +13992,19 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -13627,7 +14027,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -13777,9 +14177,19 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -13802,7 +14212,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -13897,9 +14307,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -13922,7 +14342,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -14071,9 +14491,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -14096,7 +14526,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -14242,9 +14672,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14267,7 +14707,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -14458,9 +14898,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14483,7 +14933,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -14641,9 +15091,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14666,7 +15126,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -14866,9 +15326,19 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -14891,7 +15361,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15046,9 +15516,15 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -15071,7 +15547,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15238,9 +15714,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15263,6 +15749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +<<<<<<< HEAD allNative: false usesVectorUDFAdaptor: false vectorized: true @@ -15475,6 +15962,41 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD +======= + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +>>>>>>> 0c8835c... pull Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -15483,6 +16005,8 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +======= +>>>>>>> 1a04fe1... more allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -15509,9 +16033,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -15534,7 +16068,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15699,8 +16233,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -15733,7 +16281,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -15932,8 +16480,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -15966,7 +16528,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -16172,8 +16734,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -16206,7 +16782,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -16396,8 +16972,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -16430,7 +17020,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -16651,8 +17241,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -16685,7 +17289,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -16927,9 +17531,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -16952,7 +17566,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -17163,9 +17777,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: string) mode: hash @@ -17188,7 +17812,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -17302,9 +17926,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -17327,7 +17961,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -17473,9 +18107,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -17498,7 +18142,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -17646,9 +18290,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -17671,7 +18325,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -17818,9 +18472,19 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -17843,7 +18507,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -17994,9 +18658,19 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -18019,7 +18693,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18114,9 +18788,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18139,7 +18823,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -18289,9 +18973,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -18314,7 +19008,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -18461,9 +19155,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18486,7 +19190,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -18679,9 +19383,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18704,7 +19418,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -18863,9 +19577,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -18888,7 +19612,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -19090,9 +19814,19 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19115,7 +19849,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -19271,9 +20005,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int, col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -19296,7 +20040,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -19465,9 +20209,16 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19490,6 +20241,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +<<<<<<< HEAD allNative: false usesVectorUDFAdaptor: false vectorized: true @@ -19519,6 +20271,9 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19712,6 +20467,8 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +======= +>>>>>>> 1a04fe1... more allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -19738,9 +20495,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -19763,7 +20530,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -19928,8 +20695,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -19962,7 +20743,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -20162,8 +20943,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -20196,7 +20991,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -20405,8 +21200,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -20439,7 +21248,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -20630,8 +21439,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -20664,7 +21487,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -20886,8 +21709,22 @@ STAGE PLANS: native: true predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +<<<<<<< HEAD Select Operator expressions: key (type: int) +======= + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash +>>>>>>> 1a04fe1... more outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator @@ -20920,7 +21757,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -21164,9 +22001,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 0:int + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: int) mode: hash @@ -21189,7 +22036,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -21401,9 +22248,19 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH +<<<<<<< HEAD native: false +======= + keyExpressions: col 1:string + native: true +<<<<<<< HEAD + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 1a04fe1... more +======= + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true +>>>>>>> 0c8835c... pull vectorProcessingMode: HASH keys: _col0 (type: string) mode: hash @@ -21426,7 +22283,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 3364035..511e98b 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -43,10 +43,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -70,7 +71,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -142,10 +143,11 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -169,7 +171,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -317,10 +319,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -344,7 +347,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -418,10 +421,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 17:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -445,7 +449,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_null_projection.q.out ql/src/test/results/clientpositive/llap/vector_null_projection.q.out index 1842ede..78b75b80 100644 --- ql/src/test/results/clientpositive/llap/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_null_projection.q.out @@ -156,10 +156,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 2:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) @@ -183,7 +184,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -207,10 +208,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 2:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: true (type: boolean) @@ -234,7 +236,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index fc9c453..0a62be3 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -272,6 +274,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out index 5e7e28d..e090053 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -1067,11 +1067,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 9:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1096,7 +1096,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1176,12 +1176,27 @@ STAGE PLANS: alias: nested_tbl_1 Pruned Column Paths: s1.f3 Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 8:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1190,16 +1205,23 @@ STAGE PLANS: key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1266,12 +1288,27 @@ STAGE PLANS: alias: nested_tbl_1 Pruned Column Paths: s1.f3 Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 8:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1280,16 +1317,23 @@ STAGE PLANS: key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1765,10 +1809,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int, col 12:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: boolean) @@ -1792,7 +1837,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -2228,11 +2273,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 11:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 10:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2257,7 +2302,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2367,11 +2412,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2396,7 +2441,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2506,11 +2551,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 12:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 11:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2535,7 +2580,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2653,7 +2698,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + notVectorizedReason: exception: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo cannot be cast to org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo stack trace: org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getStructFieldIndex(VectorizationContext.java:903), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUDFStructField(VectorizationContext.java:891), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:871), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:763), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:751), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doVectorizeGroupByOperatorPreparation(Vectorizer.java:4884), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperator(Vectorizer.java:5562), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChild(Vectorizer.java:954), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChildren(Vectorizer.java:840), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperatorTree(Vectorizer.java:807), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.access$2300(Vectorizer.java:267), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2053), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2005), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapWork(Vectorizer.java:1978), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.convertMapWork(Vectorizer.java:1142), ... vectorized: false Reducer 2 Execution mode: llap @@ -2746,11 +2791,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 13:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 12:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2775,7 +2820,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2893,7 +2938,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + notVectorizedReason: exception: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo cannot be cast to org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo stack trace: org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getStructFieldIndex(VectorizationContext.java:903), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUDFStructField(VectorizationContext.java:891), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:871), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:763), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:751), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doVectorizeGroupByOperatorPreparation(Vectorizer.java:4884), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperator(Vectorizer.java:5562), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChild(Vectorizer.java:954), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChildren(Vectorizer.java:840), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperatorTree(Vectorizer.java:807), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.access$2300(Vectorizer.java:267), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2053), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:2005), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapWork(Vectorizer.java:1978), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.convertMapWork(Vectorizer.java:1142), ... vectorized: false Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 0de0c33..5c295fe 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -142,11 +142,11 @@ STAGE PLANS: Group By Operator aggregations: max(b) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 3:bigint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyLongMaxColumnOperator groupByMode: HASH keyExpressions: col 7:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) @@ -171,7 +171,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 3caa979..d61bbfd 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -748,6 +748,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index 9f7f174..aa53258 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -322,6 +322,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 0f40378..36a6957 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -287,6 +287,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -504,6 +506,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) @@ -1591,6 +1595,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1808,6 +1814,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index bf8e2d8..f23935a 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -116,6 +116,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -456,6 +460,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -613,6 +619,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -770,6 +778,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 57a1ea7..e261e98 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -449,11 +449,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -478,7 +478,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1380,11 +1380,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -1409,7 +1409,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2335,11 +2335,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -2364,7 +2364,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2874,11 +2874,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -2903,7 +2903,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3805,11 +3805,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -3834,7 +3834,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4760,11 +4760,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -4789,7 +4789,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 8bec4aa..e6b2d34 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: name (type: string), age (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index f65712a..79d3414 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -77,6 +77,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index 2f604bb..f2a11ba 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -105,10 +105,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out index c2342b2..064c0c3 100644 --- ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out +++ ql/src/test/results/clientpositive/llap/vector_retry_failure.q.out @@ -55,10 +55,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: int) @@ -82,7 +83,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 0b82230..ee487a0 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -120,6 +120,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -327,6 +329,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 39578d2..bf11819 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -363,10 +363,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -391,7 +392,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index 3d0793b..5be5357 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -55,6 +55,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -243,10 +245,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) @@ -272,7 +275,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 26d695b..90b54f1 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2791,6 +2791,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -2933,6 +2935,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 7a50163..aea0dd3 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -86,7 +86,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index ef1e653..afca706 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -267,6 +267,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int) @@ -517,6 +519,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int) @@ -4011,6 +4015,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int, col 7:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) @@ -4698,6 +4704,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) @@ -6251,6 +6259,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for min not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 6660d73..4c2b7f9 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -50,6 +50,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string) @@ -291,6 +293,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: int) @@ -527,6 +531,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 5e95f39..6befb76 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -54,6 +54,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -234,6 +236,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -563,6 +567,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -743,6 +749,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1072,6 +1080,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1252,6 +1262,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1628,6 +1640,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index a0b9d23..096839f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -88,6 +88,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 61ff039..425935a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 398cb56..d95bfaa 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -114,6 +114,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -468,6 +470,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index c0995cc..76624b1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -114,6 +114,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 441097a..939404a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index d80c750..5c7c0bf 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -87,6 +87,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 6a0a81b..a6a4a77 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -92,6 +92,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 1e120f4..9f1fcb1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -97,6 +97,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index c34d8a9..496f3dc 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -92,6 +92,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 9b82b34..681d616 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index d80c750..5c7c0bf 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -87,6 +87,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 0e8f42d..4d0ff7d 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -292,6 +292,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -500,10 +502,11 @@ STAGE PLANS: native: true Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -530,7 +533,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -659,10 +662,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -688,7 +692,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -899,6 +903,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index a3e1b2c..bb93d37 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -44,6 +44,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index a59a586..ddb679f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -119,6 +119,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -382,6 +384,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -637,6 +641,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -871,6 +877,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2211,6 +2219,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2489,6 +2499,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2811,6 +2823,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3214,6 +3228,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3448,10 +3464,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3473,7 +3489,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3563,10 +3579,10 @@ STAGE PLANS: Group By Operator aggregations: count(i) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3588,7 +3604,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3750,10 +3766,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3775,7 +3791,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3865,10 +3881,10 @@ STAGE PLANS: Group By Operator aggregations: count(ctinyint) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3890,7 +3906,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3980,10 +3996,10 @@ STAGE PLANS: Group By Operator aggregations: count(cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4005,7 +4021,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4095,10 +4111,10 @@ STAGE PLANS: Group By Operator aggregations: count(cfloat) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:float) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4120,7 +4136,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4210,10 +4226,10 @@ STAGE PLANS: Group By Operator aggregations: count(cstring1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4235,7 +4251,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4325,10 +4341,10 @@ STAGE PLANS: Group By Operator aggregations: count(cboolean1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:boolean) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4350,7 +4366,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 15cd648..42f2370 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -1262,6 +1262,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 826fc5f..c1959f9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -199,10 +201,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -227,7 +230,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -280,6 +283,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 68ffb3c..2449fc9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -84,7 +84,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -306,7 +306,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -627,7 +627,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -671,7 +671,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1080,7 +1080,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1396,7 +1396,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1710,7 +1710,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1870,7 +1870,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2333,7 +2333,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2470,7 +2470,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2819,7 +2819,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2979,7 +2979,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3094,7 +3094,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -3259,7 +3259,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3403,7 +3403,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -3447,7 +3447,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4303,7 +4303,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 10 @@ -4359,7 +4359,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 7 @@ -4652,7 +4652,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -4696,7 +4696,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4827,7 +4827,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -4871,7 +4871,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4915,7 +4915,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5036,7 +5036,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5095,7 +5095,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5213,7 +5213,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5257,7 +5257,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5364,7 +5364,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5408,7 +5408,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5515,7 +5515,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5559,7 +5559,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5674,7 +5674,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5830,7 +5830,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5874,7 +5874,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5965,7 +5965,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6106,7 +6106,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 3 @@ -6205,7 +6205,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6249,7 +6249,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -6293,7 +6293,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6473,7 +6473,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 4 @@ -6580,7 +6580,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index c22a127..ddfd130 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -52,7 +52,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 1904 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -61,7 +61,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) predicate: ((key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) and key_int is not null) (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 @@ -69,7 +69,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -78,7 +78,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -137,6 +137,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -170,7 +172,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 522 Data size: 1993 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -289,7 +291,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_str is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -298,7 +300,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue)) predicate: ((key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and key_str is not null) (type: boolean) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 83371 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string) outputColumnNames: _col0 @@ -306,7 +308,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 83371 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -315,7 +317,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 83371 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -374,6 +376,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -407,7 +411,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 522 Data size: 91524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 522 Data size: 91708 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -526,7 +530,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_str is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -535,7 +539,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue)) predicate: ((key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and key_str is not null) (type: boolean) - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 83371 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string) outputColumnNames: _col0 @@ -543,7 +547,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 83371 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -552,7 +556,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 83371 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -611,6 +615,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -644,7 +650,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 522 Data size: 91524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 522 Data size: 91708 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -764,7 +770,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 1904 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -773,7 +779,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) predicate: ((key_int BETWEEN DynamicValue(RS_10_b_key_int_min) AND DynamicValue(RS_10_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_10_b_key_int_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_11_c_key_int_min) AND DynamicValue(RS_11_c_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_11_c_key_int_bloom_filter))) and key_int is not null) (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 @@ -781,7 +787,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -790,7 +796,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -849,6 +855,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -920,6 +928,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -955,7 +965,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1045 Data size: 3977 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045 Data size: 3986 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1104,7 +1114,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_str is not null and key_int is not null and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 89488 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -1113,7 +1123,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterStringColumnBetweenDynamicValue(col 0:string, left NULL, right NULL), VectorInBloomFilterColDynamicValue), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) predicate: ((key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) and (key_str BETWEEN DynamicValue(RS_7_b_key_str_min) AND DynamicValue(RS_7_b_key_str_max) and in_bloom_filter(key_str, DynamicValue(RS_7_b_key_str_bloom_filter))) and key_int is not null and key_str is not null) (type: boolean) - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 451 Data size: 80879 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_str (type: string), key_int (type: int) outputColumnNames: _col0, _col1 @@ -1121,7 +1131,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 451 Data size: 80879 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ @@ -1130,7 +1140,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 450 Data size: 80539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 451 Data size: 80879 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1189,6 +1199,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1217,6 +1229,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1250,7 +1264,7 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: int) 1 _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 495 Data size: 88592 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 496 Data size: 88966 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1398,7 +1412,7 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 1904 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true Filter Operator @@ -1407,7 +1421,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:int), FilterExprAndExpr(children: FilterLongColumnBetweenDynamicValue(col 1:int, left 0, right 0), VectorInBloomFilterColDynamicValue)) predicate: ((key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) and key_int is not null) (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 @@ -1415,7 +1429,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1424,7 +1438,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1483,6 +1497,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1516,7 +1532,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 522 Data size: 1993 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1649,19 +1665,19 @@ STAGE PLANS: TableScan alias: a filterExpr: (key_int is not null and (key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter)))) (type: boolean) - Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 499 Data size: 1904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key_int BETWEEN DynamicValue(RS_7_b_key_int_min) AND DynamicValue(RS_7_b_key_int_max) and in_bloom_filter(key_int, DynamicValue(RS_7_b_key_int_bloom_filter))) and key_int is not null) (type: boolean) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key_int (type: int) outputColumnNames: _col0 - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 475 Data size: 1812 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -1706,7 +1722,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 522 Data size: 1993 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 946f37e..91e6783 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -349,6 +349,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 0c751db..1b1eae5 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -77,6 +77,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index c9b9e81..87888d3 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -210,6 +210,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -415,6 +417,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -620,6 +624,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for max not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index d0d13ba..fc61690 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -298,6 +298,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index d52e212..9b2ccc2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -3752,6 +3752,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 384bf61..7e8e8d8 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -371,6 +373,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -511,6 +515,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 2204f26..cc3bdff 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1008,6 +1008,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1135,6 +1137,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true, Has issues "[Multi-key for sum not implemented]" IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1280,6 +1284,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregate, Duplicate Reduction, or Single MAX, MIN, SUM word-size aggregate IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 6973da0..a0ee773 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -52,6 +52,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -206,6 +208,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -508,6 +512,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -662,6 +668,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -964,6 +972,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1118,6 +1128,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1467,6 +1479,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 8f5c06c..85cd40d 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index 8dd930a..37b702e 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index 78a2428..fa93484 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -441,6 +443,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index c6d8f3b..435864f 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 51bf4e0..32f0b71 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index c8ad650..54ef04e 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 5323ab3..4b9b0b0 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index 62dd3f5..430c0f8 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index 0f544a4..b3d7183 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 40205d2..2abcc24 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -83,6 +83,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index c8ad650..54ef04e 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index 4c6fbca..ea1e381 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -249,6 +249,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -401,6 +403,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -541,6 +545,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -725,6 +731,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index b156ff4..bb7b3f1 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -161,10 +161,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -185,7 +185,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -363,10 +363,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -387,7 +387,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -753,10 +753,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -777,7 +777,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1102,11 +1102,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 7:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1130,7 +1130,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1238,11 +1238,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 8:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1266,7 +1266,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1374,11 +1374,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1402,7 +1402,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1510,11 +1510,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1538,7 +1538,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 5e0d60f..20078be 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -148,6 +148,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 1cf0724..41fad79a 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1264,10 +1264,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1290,7 +1291,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1317,10 +1318,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index d368d05..62fe312 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -374,6 +374,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index 6e33ead..27bb6b3 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -87,6 +87,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -265,6 +267,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -477,6 +481,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -674,6 +680,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index a40484f..86d592b 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -139,10 +139,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) @@ -165,7 +166,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index bbce14c..77857f4 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -145,6 +145,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index efab3f0..73e0c3c 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -197,12 +197,12 @@ PREHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1a PREHOOK: Input: default@orc_table_2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 @@ -250,10 +250,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -274,7 +275,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -373,12 +374,12 @@ PREHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1a PREHOOK: Input: default@orc_table_2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 PREHOOK: query: CREATE TABLE orc_table_1b(v1 STRING, a INT) STORED AS ORC PREHOOK: type: CREATETABLE @@ -581,12 +582,12 @@ PREHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 3 PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -746,12 +747,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_ta PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 3 3 THREE PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -920,12 +921,12 @@ PREHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join or PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 6 15 THREE PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -1093,12 +1094,12 @@ PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three THREE 3 PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -1266,12 +1267,12 @@ PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 three THREE PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 @@ -1439,12 +1440,12 @@ PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three THREE 3 PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 @@ -1612,10 +1613,10 @@ PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 three THREE diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index 9f8dea3..46a364f 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -131,7 +131,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 63d13fa..bf19d25 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -91,10 +91,11 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -115,7 +116,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -144,10 +145,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -170,7 +172,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -370,10 +372,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 17:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -394,7 +397,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -423,10 +426,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -449,7 +453,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index c35156e..e32080d 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index a35a2df..9644846 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -37,11 +37,11 @@ POSTHOOK: Lineage: orc_table_2.v2 SCRIPT [] PREHOOK: query: select * from orc_table_1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from orc_table_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL one 1 @@ -51,11 +51,11 @@ two 2 PREHOOK: query: select * from orc_table_2 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from orc_table_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 0 ZERO 2 TWO 3 THREE @@ -204,12 +204,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer j PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL NULL NULL NULL NULL one 1 NULL NULL @@ -358,12 +358,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL 0 ZERO NULL NULL 4 FOUR NULL NULL NULL diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index e8dc744..4f70c98 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4a.ctinyint SIMPLE [] PREHOOK: query: select * from small_alltypesorc1a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL @@ -102,11 +102,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc2a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -115,24 +115,33 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### NULL -16306 384405526 -1645852809 NULL -16306.0 b5SoK8 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:11.105 true false NULL -16307 559926362 -1645852809 NULL -16307.0 nA8bdtWfPPQyP2hL5 xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:58.072 false false NULL -16309 -826497289 -1645852809 NULL -16309.0 54o058c3mK6ewOQ5 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:10.761 false false NULL -16310 206154150 1864027286 NULL -16310.0 5Hy1y6 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:00.821 false true NULL -16379 -894716315 1864027286 NULL -16379.0 2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:47.059 true true +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc4a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from (select * from (select * from small_alltypesorc1a) sq1 union all @@ -187,20 +196,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_a PREHOOK: Output: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_a POSTHOOK: Output: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL @@ -366,14 +375,14 @@ left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL @@ -541,14 +550,14 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -64 -64 @@ -836,6 +845,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -922,7 +933,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -932,5 +943,5 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 145 -8960 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 217217c..226473c 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -89,24 +89,33 @@ POSTHOOK: Lineage: small_alltypesorc4a_n0.ctinyint SIMPLE [(alltypesorc)alltypes PREHOOK: query: select * from small_alltypesorc1a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -51 NULL -1064981602 -1444011153 -51.0 NULL aY3tpnr6wfvmWMG0U881 2Ol4N3Ha0815Ej54lA2N 1969-12-31 16:00:08.451 NULL false false -51 NULL -1065775394 -1331703092 -51.0 NULL aD88uS2N8DmqPlvjOa7F46i7 Ut8ka2o8iokF504065PYS 1969-12-31 16:00:08.451 NULL false true -51 NULL -1066684273 2034191923 -51.0 NULL 2W4Kg220OcCy065HG60k6e D7GOQhc3qbAR6 1969-12-31 16:00:08.451 NULL false false -51 NULL -1067683781 1750003656 -51.0 NULL IbgbUvP5 47x2I874 1969-12-31 16:00:08.451 NULL false true -51 NULL -1071480828 -1401575336 -51.0 NULL aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA 1969-12-31 16:00:08.451 NULL true true +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc2a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a_n0 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -115,24 +124,33 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL -64 -9842 253665376 NULL -64.0 -9842.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 true NULL +======= +#### A masked pattern was here #### +NULL -13166 626923679 NULL NULL -13166.0 821UdmGbkEf4j NULL 1969-12-31 15:59:55.089 1969-12-31 16:00:15.69 true NULL +NULL -14426 626923679 NULL NULL -14426.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.505 1969-12-31 16:00:13.309 true NULL +NULL -14847 626923679 NULL NULL -14847.0 821UdmGbkEf4j NULL 1969-12-31 16:00:00.612 1969-12-31 15:59:43.704 true NULL +NULL -15632 528534767 NULL NULL -15632.0 cvLH6Eat2yFsyy7p NULL NULL 1969-12-31 15:59:53.593 true NULL +NULL -15830 253665376 NULL NULL -15830.0 1cGVWH7n1QU NULL 1969-12-31 16:00:02.582 1969-12-31 16:00:00.518 true NULL +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc4a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a_n0 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -60 -200 NULL NULL -60.0 -200.0 NULL NULL 1969-12-31 16:00:11.996 1969-12-31 15:59:55.451 NULL NULL -61 -7196 NULL NULL -61.0 -7196.0 NULL 8Mlns2Tl6E0g 1969-12-31 15:59:44.823 1969-12-31 15:59:58.174 NULL false -61 -7196 NULL NULL -61.0 -7196.0 NULL fUJIN 1969-12-31 16:00:11.842 1969-12-31 15:59:58.174 NULL false @@ -192,25 +210,29 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a_n0 COMPUTE STATISTICS FOR COLU PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_a_n0 PREHOOK: Output: default@small_alltypesorc_a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a_n0 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_a_n0 POSTHOOK: Output: default@small_alltypesorc_a_n0 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a_n0 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a_n0 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -51 NULL -1064981602 -1444011153 -51.0 NULL aY3tpnr6wfvmWMG0U881 2Ol4N3Ha0815Ej54lA2N 1969-12-31 16:00:08.451 NULL false false -51 NULL -1065775394 -1331703092 -51.0 NULL aD88uS2N8DmqPlvjOa7F46i7 Ut8ka2o8iokF504065PYS 1969-12-31 16:00:08.451 NULL false true -51 NULL -1066684273 2034191923 -51.0 NULL 2W4Kg220OcCy065HG60k6e D7GOQhc3qbAR6 1969-12-31 16:00:08.451 NULL false false -51 NULL -1067683781 1750003656 -51.0 NULL IbgbUvP5 47x2I874 1969-12-31 16:00:08.451 NULL false true -51 NULL -1071480828 -1401575336 -51.0 NULL aw724t8c5558x2xneC624 4uE7l74tESBiKfu7c8wM7GA 1969-12-31 16:00:08.451 NULL true true +======= +#### A masked pattern was here #### +>>>>>>> 1a04fe1... more -60 -200 NULL NULL -60.0 -200.0 NULL NULL 1969-12-31 16:00:11.996 1969-12-31 15:59:55.451 NULL NULL -61 -7196 NULL NULL -61.0 -7196.0 NULL 8Mlns2Tl6E0g 1969-12-31 15:59:44.823 1969-12-31 15:59:58.174 NULL false -61 -7196 NULL NULL -61.0 -7196.0 NULL fUJIN 1969-12-31 16:00:11.842 1969-12-31 15:59:58.174 NULL false @@ -408,6 +430,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -494,7 +518,7 @@ left outer join small_alltypesorc_a_n0 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n0 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a_n0 c left outer join small_alltypesorc_a_n0 cd @@ -504,5 +528,10 @@ left outer join small_alltypesorc_a_n0 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n0 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 24 -3110813706 +======= +#### A masked pattern was here #### +34 -26289186744 +>>>>>>> 1a04fe1... more diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index e742ff3..05b2873 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -89,24 +89,33 @@ POSTHOOK: Lineage: small_alltypesorc4a_n1.ctinyint SIMPLE [(alltypesorc)alltypes PREHOOK: query: select * from small_alltypesorc1a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL -64 -9842 253665376 NULL -64.0 -9842.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.631 1969-12-31 16:00:01.781 true NULL +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc2a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL NULL -1731061911 -51.0 NULL Pw53BBJ yL443x2437PO5Hv1U3lCjq2D 1969-12-31 16:00:08.451 NULL true false -51 NULL NULL -1846191223 -51.0 NULL Ul085f84S33Xd32u x1JC58g0Ukp 1969-12-31 16:00:08.451 NULL true true -51 NULL NULL -1874052220 -51.0 NULL c61B47I604gymFJ sjWQS78 1969-12-31 16:00:08.451 NULL false false @@ -115,11 +124,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL -31312632 1086455747 -51.0 NULL NULL Bc7xt12568c451o64LF5 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -337975743 608681041 -51.0 NULL NULL Ih2r28o6 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -413196097 -306198070 -51.0 NULL NULL F53QcSDPpxYF1Ub 1969-12-31 16:00:08.451 NULL NULL false @@ -128,11 +137,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc4a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -192,20 +201,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a_n1 COMPUTE STATISTICS FOR COLU PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_a_n1 PREHOOK: Output: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a_n1 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_a_n1 POSTHOOK: Output: default@small_alltypesorc_a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a_n1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a_n1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL -31312632 1086455747 -51.0 NULL NULL Bc7xt12568c451o64LF5 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -337975743 608681041 -51.0 NULL NULL Ih2r28o6 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -413196097 -306198070 -51.0 NULL NULL F53QcSDPpxYF1Ub 1969-12-31 16:00:08.451 NULL NULL false @@ -254,7 +263,7 @@ left outer join small_alltypesorc_a_n1 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -264,8 +273,13 @@ left outer join small_alltypesorc_a_n1 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 32 +======= +#### A masked pattern was here #### +20 +>>>>>>> 1a04fe1... more PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c @@ -294,7 +308,7 @@ left outer join small_alltypesorc_a_n1 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -304,8 +318,13 @@ left outer join small_alltypesorc_a_n1 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 24 +======= +#### A masked pattern was here #### +28 +>>>>>>> 1a04fe1... more PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c @@ -334,7 +353,7 @@ left outer join small_alltypesorc_a_n1 hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a_n1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -344,5 +363,10 @@ left outer join small_alltypesorc_a_n1 hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### 24 +======= +#### A masked pattern was here #### +28 +>>>>>>> 1a04fe1... more diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index 70f7401..ef8272f 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4b.ctinyint SIMPLE [] PREHOOK: query: select * from small_alltypesorc1b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -3097 253665376 NULL -64.0 -3097.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.013 1969-12-31 16:00:06.097 true NULL @@ -107,11 +107,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc2b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -200 NULL -1809444706 -64.0 -200.0 NULL B87YVb3UASqg 1969-12-31 16:00:10.858 1969-12-31 15:59:55.451 NULL true -64 -200 NULL 2118653994 -64.0 -200.0 NULL ONHGSDy1U4Ft431DfQp15 1969-12-31 16:00:03.944 1969-12-31 15:59:55.451 NULL true -64 -200 NULL 927647669 -64.0 -200.0 NULL DhxkBT 1969-12-31 16:00:00.199 1969-12-31 15:59:55.451 NULL false @@ -125,10 +125,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3b +<<<<<<< HEAD POSTHOOK: Output: hdfs://### HDFS PATH ### NULL -16269 -378213344 -1645852809 NULL -16269.0 sOdj1Tmvbl03f xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:15.867 false false NULL -16274 -671342269 -1645852809 NULL -16274.0 3DE7EQo4KyT0hS xH7445Rals48VOulSyR5F NULL 1969-12-31 15:59:51.469 false false @@ -140,14 +141,27 @@ NULL -16307 559926362 -1645852809 NULL -16307.0 nA8bdtWfPPQyP2hL5 xH7445Rals48VO NULL -16309 -826497289 -1645852809 NULL -16309.0 54o058c3mK6ewOQ5 xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:10.761 false false NULL -16310 206154150 1864027286 NULL -16310.0 5Hy1y6 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:00.821 false true NULL -16379 -894716315 1864027286 NULL -16379.0 2ArdYqML3654nUjGJk3 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:47.059 true true +======= +#### A masked pattern was here #### +NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false +NULL NULL -609074876 -1887561756 NULL NULL EcM71 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:55.061 true false +NULL NULL -700300206 -1887561756 NULL NULL kdqQE010 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:58.384 false false +NULL NULL -726473298 1864027286 NULL NULL OFy1a1xf37f75b5N 4KWs6gw7lv2WYd66P NULL 1969-12-31 16:00:11.799 true true +NULL NULL -738747840 -1645852809 NULL NULL vmAT10eeE47fgH20pLi xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:11.55 true false +NULL NULL -838810013 1864027286 NULL NULL N016jPED08o 4KWs6gw7lv2WYd66P NULL 1969-12-31 15:59:44.252 false true +NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false +NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false +NULL NULL -899422227 -1645852809 NULL NULL 73xdw4X xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:07.395 false false +NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 1969-12-31 16:00:05.43 false false +>>>>>>> 1a04fe1... more PREHOOK: query: select * from small_alltypesorc4b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_alltypesorc_b stored as orc as select * from (select * from (select * from small_alltypesorc1b) sq1 union all @@ -202,20 +216,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_alltypesorc_b PREHOOK: Output: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_alltypesorc_b POSTHOOK: Output: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -200 NULL -1809444706 -64.0 -200.0 NULL B87YVb3UASqg 1969-12-31 16:00:10.858 1969-12-31 15:59:55.451 NULL true @@ -265,14 +279,14 @@ left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -3586 626923679 NULL -64.0 -3586.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.952 1969-12-31 15:59:51.131 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -4018 626923679 NULL -64.0 -4018.0 821UdmGbkEf4j NULL 1969-12-31 15:59:58.959 1969-12-31 16:00:07.803 true NULL @@ -346,14 +360,14 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -64 -64 @@ -792,7 +806,7 @@ left outer join small_alltypesorc_b hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -802,5 +816,5 @@ left outer join small_alltypesorc_b hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 890 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 6ad6a7b..12dade0 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -28,12 +28,12 @@ PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@sorted_mod_4 PREHOOK: Output: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@sorted_mod_4 POSTHOOK: Output: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 PREHOOK: type: CREATETABLE_AS_SELECT @@ -60,12 +60,12 @@ PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_table PREHOOK: Output: default@small_table -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_table POSTHOOK: Output: default@small_table -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s @@ -89,7 +89,7 @@ on s.ctinyint = st.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -98,7 +98,7 @@ on s.ctinyint = st.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6876 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -123,7 +123,7 @@ on s.ctinyint = sm.ctinyint and s.cmodint = 2 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -132,7 +132,7 @@ on s.ctinyint = sm.ctinyint and s.cmodint = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6058 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -157,7 +157,7 @@ on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -166,7 +166,7 @@ on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6248 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -191,7 +191,7 @@ on s.ctinyint = sm.ctinyint and s.ctinyint < 100 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -200,7 +200,7 @@ on s.ctinyint = sm.ctinyint and s.ctinyint < 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6876 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* @@ -231,7 +231,7 @@ left outer join sorted_mod_4 s2 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -242,7 +242,7 @@ left outer join sorted_mod_4 s2 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3268334 PREHOOK: query: create table mod_8_mod_4 stored as orc as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc @@ -272,12 +272,12 @@ PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Output: default@mod_8_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Output: default@mod_8_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 PREHOOK: type: CREATETABLE_AS_SELECT @@ -304,12 +304,12 @@ PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@small_table2 PREHOOK: Output: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@small_table2 POSTHOOK: Output: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s @@ -333,7 +333,7 @@ on s.cmodtinyint = st.cmodtinyint PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -342,7 +342,7 @@ on s.cmodtinyint = st.cmodtinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 39112 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -367,7 +367,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -376,7 +376,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 11171 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -401,7 +401,7 @@ on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -410,7 +410,7 @@ on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 14371 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -435,7 +435,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -444,7 +444,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 17792 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* @@ -475,7 +475,7 @@ left outer join mod_8_mod_4 s2 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -486,5 +486,5 @@ left outer join mod_8_mod_4 s2 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6524438 diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 2946a02..3455d9a 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -352,10 +352,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -379,7 +380,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 6b09206..f128447 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -231,6 +233,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -556,6 +560,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -734,6 +740,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1059,6 +1067,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1237,6 +1247,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1609,6 +1621,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index e933da7..76a8c29 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -87,6 +87,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 53902c4..304e00f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -111,6 +111,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index a49738e..daa93a1 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -465,6 +467,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 96e17a9..19f5539 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 1cea297..22fce8f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -109,6 +109,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index adb0491..2a7221f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index 641e61e..a4440e7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index 1baaaf6..f0c573d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -96,6 +96,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index c67b97f..7739fc5 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index da9a4d3..cc39474 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -84,6 +84,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index adb0491..2a7221f 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index 40e13bb..0603ca7 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -43,6 +43,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index 5ee04f4..b52166b 100644 --- ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -232,7 +232,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -321,7 +321,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -521,7 +521,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -607,7 +607,7 @@ STAGE PLANS: enabled: true inputFormatFeatureSupport: [] featureSupportInUse: [] - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 3844c79..8d792cf 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -118,6 +118,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -379,6 +381,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -632,6 +636,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -864,6 +870,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2194,6 +2202,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2470,6 +2480,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2790,6 +2802,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3191,6 +3205,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3423,10 +3439,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3447,7 +3463,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3536,10 +3552,10 @@ STAGE PLANS: Group By Operator aggregations: count(i) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3560,7 +3576,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3721,10 +3737,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3745,7 +3761,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3834,10 +3850,10 @@ STAGE PLANS: Group By Operator aggregations: count(ctinyint) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3858,7 +3874,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3947,10 +3963,10 @@ STAGE PLANS: Group By Operator aggregations: count(cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3971,7 +3987,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4060,10 +4076,10 @@ STAGE PLANS: Group By Operator aggregations: count(cfloat) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:float) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4084,7 +4100,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4173,10 +4189,10 @@ STAGE PLANS: Group By Operator aggregations: count(cstring1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4197,7 +4213,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4286,10 +4302,10 @@ STAGE PLANS: Group By Operator aggregations: count(cboolean1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:boolean) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4310,7 +4326,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index fdf337d..e3fcef1 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -123,6 +123,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index e6595f9..618f798 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -3698,6 +3698,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.vectorized.execution.groupby.native.enabled IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 198f688..dbe6d41 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -140,6 +140,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -246,6 +248,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -352,6 +356,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index d245680..92544aa 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -80,6 +80,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 7c550ba..2e13fcf 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -195,6 +195,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -345,6 +347,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:binary native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 68a1aea..9926b59 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -143,6 +143,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index dc2c1e4..662852f 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -104,6 +104,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -292,6 +294,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 918ac59..01c02d8 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -267,6 +269,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index a971eed..bb15803 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -345,6 +345,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index b9f4444..1f3a1a1 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -224,6 +226,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -399,6 +403,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -560,6 +566,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index a530b3b..c4b8b19 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -586,6 +586,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1171,6 +1173,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_delete_orig_table.q.out ql/src/test/results/clientpositive/vector_delete_orig_table.q.out index 60d3c4d..969f2e0 100644 --- ql/src/test/results/clientpositive/vector_delete_orig_table.q.out +++ ql/src/test/results/clientpositive/vector_delete_orig_table.q.out @@ -69,6 +69,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index 8eefb3d..7068fd0 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -138,6 +138,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index 388b775..6dd4ea1 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -43,6 +43,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -186,6 +188,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -337,6 +341,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -488,6 +494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index 173f84f..110264a 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -140,6 +140,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 4be1272..6b7c4ad 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -336,6 +338,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_multikey.q.out ql/src/test/results/clientpositive/vector_groupby_multikey.q.out new file mode 100644 index 0000000..d8d602a --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_multikey.q.out @@ -0,0 +1,2351 @@ +PREHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a.key1 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 EXPRESSION [] +POSTHOOK: Lineage: groupby_multi_1a.key1 EXPRESSION [] +PREHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: Lineage: groupby_multi_1a_nonull.key0 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a_nonull.key1 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +NULL NULL 2 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +NULL NULL +PREHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a_nonull + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string), ts (type: timestamp) + outputColumnNames: bo, s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string) + outputColumnNames: bo, s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), d (type: double), ts (type: timestamp) + outputColumnNames: si, d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), ts (type: timestamp) + outputColumnNames: si, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: f (type: float), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: f, dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(f) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: si, i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(si) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 +65536 4294967307 +65536 4294967308 +65536 4294967312 +65536 4294967317 +65536 4294967320 +65536 4294967326 +65536 4294967334 +65536 4294967336 +65536 4294967338 diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 9c81fbf..e8b6da8 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -266,6 +266,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -458,6 +460,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -735,6 +739,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -933,6 +939,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out new file mode 100644 index 0000000..3ef66a4 --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out @@ -0,0 +1,11313 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +NULL +PREHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a_nonull + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +1569543799237464101 +3313583664488247651 +968819023021777205 +PREHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +3313583664488247651 +968819023021777205 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +NULL +PREHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b_nonull + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1035 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 517 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +NULL +PREHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c_nonull + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +PREHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 +1928928239 +PREHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: insert into groupby_decimal64_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1a values (333.33) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (333.33) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp EXPRESSION [] +POSTHOOK: Lineage: groupby_decimal64_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,3)/DECIMAL_64) -> 2:decimal(6,3) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,3)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(6,3)] + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +NULL +PREHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a_nonull + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,3)/DECIMAL_64) -> 2:decimal(6,3) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,3)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(6,3)] + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:decimal(8,2)/DECIMAL_64) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: c_timestamp (type: timestamp), key (type: decimal(8,2)) + outputColumnNames: c_timestamp, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_timestamp) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:timestamp) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(8,2)] + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +NULL +PREHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b_nonull + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(8,2)] + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1412 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +NULL +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +QNCYBDW +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a_nonull + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +MXGDMBD +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +MXGD +NULL +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b_nonull + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +MXGD +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7599 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3718 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +NULL +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c_nonull + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 41 Data size: 7144 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 3484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +NULL +PREHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a_nonull + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +NULL +PREHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:36.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b_nonull + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:36.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 5113966..fb25b98 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -164,6 +164,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -275,6 +277,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 0ecc7af..ba4b882 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -241,6 +241,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 734c6a9..281a866 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -141,6 +141,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index 2026d59..ca29a2b 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -703,6 +703,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index caf51a1..79b7a05 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -343,6 +343,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index 6cae357..778a4d8 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -244,7 +244,11 @@ left outer join small_alltypesorc_a_n1 hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4493 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 4942 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -284,7 +288,11 @@ left outer join small_alltypesorc_a_n1 hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4493 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 4942 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -324,7 +332,11 @@ left outer join small_alltypesorc_a_n1 hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2` AND `t`.`cbigint` = `t0`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1` AND `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4085 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4493 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 4942 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t0` ON `t`.`cstring2` = `t0`.`cstring2` AND `t`.`cbigint` = `t0`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t1` ON `t`.`cstring1` = `t1`.`cstring1` AND `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index adbb759..3d86d2d 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -782,7 +782,11 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY +<<<<<<< HEAD {"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6800 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7480 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8228 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +======= +{"optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t0` ON `t`.`cint` = `t0`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +>>>>>>> 1a04fe1... more PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out index 750ef5c..5a17728 100644 --- ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out +++ ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out @@ -98,6 +98,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -243,6 +245,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 1632b31..faed5fe 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -62,6 +62,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out index a735338..3c8a01c 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out @@ -99,6 +99,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index c807d16..82e94d5 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -348,6 +348,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 20:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_topnkey.q.out ql/src/test/results/clientpositive/vector_topnkey.q.out index ed829e2..a3b4559 100644 --- ql/src/test/results/clientpositive/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/vector_topnkey.q.out @@ -40,6 +40,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -204,6 +206,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 2cdbe38..ca8d9f6 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -56,6 +56,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_1.q.out ql/src/test/results/clientpositive/vectorization_1.q.out index c82252f..6677e53 100644 --- ql/src/test/results/clientpositive/vectorization_1.q.out +++ ql/src/test/results/clientpositive/vectorization_1.q.out @@ -82,6 +82,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_12.q.out ql/src/test/results/clientpositive/vectorization_12.q.out index 50eddc1..ade85bd 100644 --- ql/src/test/results/clientpositive/vectorization_12.q.out +++ ql/src/test/results/clientpositive/vectorization_12.q.out @@ -106,6 +106,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index d2f3448..9f1cb16 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -439,6 +441,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index b1241d0..a11d200 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 24b9a0b..a9000bec 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -104,6 +104,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index f26ce5e..13a6ea7 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -81,6 +81,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_2.q.out ql/src/test/results/clientpositive/vectorization_2.q.out index fdf9d3b..25766b6 100644 --- ql/src/test/results/clientpositive/vectorization_2.q.out +++ ql/src/test/results/clientpositive/vectorization_2.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_3.q.out ql/src/test/results/clientpositive/vectorization_3.q.out index db7d7ba..94fd728 100644 --- ql/src/test/results/clientpositive/vectorization_3.q.out +++ ql/src/test/results/clientpositive/vectorization_3.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_4.q.out ql/src/test/results/clientpositive/vectorization_4.q.out index cd62953..e5dc54d 100644 --- ql/src/test/results/clientpositive/vectorization_4.q.out +++ ql/src/test/results/clientpositive/vectorization_4.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_5.q.out ql/src/test/results/clientpositive/vectorization_5.q.out index 1396fdc..0b9e03f 100644 --- ql/src/test/results/clientpositive/vectorization_5.q.out +++ ql/src/test/results/clientpositive/vectorization_5.q.out @@ -79,6 +79,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index f26ce5e..13a6ea7 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -81,6 +81,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index a834620..d266938 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -247,6 +247,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -427,6 +429,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -732,6 +736,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index b4fe31a..94832bc 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -38,6 +38,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index d857cb0..0616573 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1240,6 +1240,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index e9429a8..8cd0145 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 438e564..52d7d8e 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -116,6 +116,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index 8d47d1c..cb90b13 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -133,6 +133,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -308,6 +310,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -483,6 +487,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index c1f2d54..4e011e4 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -360,6 +360,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index da869ce..eafa0e0 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -135,6 +135,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -323,6 +325,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -430,6 +434,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index fc7ad07..ea6bf8a 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -934,6 +934,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1041,6 +1043,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1161,6 +1165,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 0e147be..574b4a8 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -26,10 +26,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Task; @@ -1261,6 +1264,63 @@ //template, , , {"VectorUDAFVarMerge", "VectorUDAFVarPartial2", "PARTIAL2"}, {"VectorUDAFVarMerge", "VectorUDAFVarFinal", "FINAL"}, + + // Native Vector GROUP BY Single "COUNT" Aggregation. + {"GroupByHashSingleKeyCountColumnOperator", "VectorGroupByHash", "Long", "KeyCountColumnOperator", "Count"}, + {"GroupByHashSingleKeyCountColumnOperator", "VectorGroupByHash", "String", "KeyCountColumnOperator", "Count"}, + {"GroupByHashSingleKeyCountColumnOperator", "VectorGroupByHash", "Single", "KeyCountColumnOperator", "Count"}, + + {"GroupByHashSingleKeyCountKeyOperator", "VectorGroupByHash", "Long", "KeyCountKeyOperator", "Count"}, + {"GroupByHashSingleKeyCountKeyOperator", "VectorGroupByHash", "String", "KeyCountKeyOperator", "Count"}, + {"GroupByHashSingleKeyCountKeyOperator", "VectorGroupByHash", "Single", "KeyCountKeyOperator", "Count"}, + + {"GroupByHashSingleKeyCountStarOperator", "VectorGroupByHash", "Long", "KeyCountStarOperator", "Count"}, + {"GroupByHashSingleKeyCountStarOperator", "VectorGroupByHash", "String", "KeyCountStarOperator", "Count"}, + {"GroupByHashSingleKeyCountStarOperator", "VectorGroupByHash", "Single", "KeyCountStarOperator", "Count"}, + + + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeyCountColumnOperator", "Count"}, + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeyCountKeyOperator", "Count"}, + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeyCountStarOperator", "Count"}, + + {"GroupByHashMultiKeyCountColumnOperator", "VectorGroupByHash", "Multi", "KeyCountColumnOperator", "Count"}, + {"GroupByHashMultiKeyCountKeyOperator", "VectorGroupByHash", "Multi", "KeyCountKeyOperator", "Count"}, + {"GroupByHashMultiKeyCountStarOperator", "VectorGroupByHash", "Multi", "KeyCountStarOperator", "Count"}, + + // Native Vector GROUP BY Duplicate Reduction. + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Long", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "String", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Single", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + + {"GroupByHashMultiKeyDuplicateReductionOperator", "VectorGroupByHash", "Multi", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + + // Native Vector GROUP BY Single MIN, MAX, SUM Aggregation. + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Long", "KeyLongMaxColumnOperator", "Max", "Long"}, + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "String", "KeyLongMaxColumnOperator", "Max", "Long"}, + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Single", "KeyLongMaxColumnOperator", "Max", "Long"}, + + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Long", "KeyMaxDoubleColumnOperator", "Max", "Double"}, + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "String", "KeyDoubleMaxColumnOperator", "Max", "Double"}, + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Single", "KeyDoubleMaxColumnOperator", "Max", "Double"}, + + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Long", "KeyLongMinColumnOperator", "Min", "Long"}, + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "String", "KeyLongMinColumnOperator", "Min", "Long"}, + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Single", "KeyLongMinColumnOperator", "Min", "Long"}, + + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Long", "KeyMinDoubleColumnOperator", "Min", "Double"}, + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "String", "KeyDoubleMinColumnOperator", "Min", "Double"}, + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Single", "KeyDoubleMinColumnOperator", "Min", "Double"}, + + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Long", "KeyLongSumColumnOperator", "Sum", "Long"}, + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "String", "KeyLongSumColumnOperator", "Sum", "Long"}, + {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Single", "KeyLongSumColumnOperator", "Sum", "Long"}, + + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Long", "KeySumDoubleColumnOperator", "Sum", "Double"}, + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "String", "KeyDoubleSumColumnOperator", "Sum", "Double"}, + // {"GroupByHashSingleKeyWordColumnOperator", "VectorGroupByHash", "Single", "KeyDoubleSumColumnOperator", "Sum", "Double"}, + }; @@ -1273,6 +1333,11 @@ private String udafOutputDirectory; private String udafClassesDirectory; private String udafTemplateDirectory; + + private String groupByOperatorOutputDirectory; + private String groupByOperatorClassesDirectory; + private String groupByOperatorTemplateDirectory; + private GenVectorTestCode testCodeGen; static String joinPath(String...parts) { @@ -1309,6 +1374,16 @@ public void init(String templateBaseDir, String buildDir) { udafTemplateDirectory = joinPath(generationDirectory.getAbsolutePath(), "UDAFTemplates"); + String groupByOperator = joinPath("org", "apache", "hadoop", + "hive", "ql", "exec", "vector", "groupby", "operator", "gen"); + File groupByOperatorOutput = new File(joinPath(buildPath, groupByOperator)); + File groupByOperatorClasses = new File(joinPath(compiledPath, groupByOperator)); + groupByOperatorOutputDirectory = groupByOperatorOutput.getAbsolutePath(); + groupByOperatorClassesDirectory = groupByOperatorClasses.getAbsolutePath(); + + groupByOperatorTemplateDirectory = + joinPath(generationDirectory.getAbsolutePath(), "GroupByOperatorTemplates"); + File testCodeOutput = new File( joinPath(buildDir, "generated-test-sources", "java", "org", @@ -1556,6 +1631,19 @@ private void generate() throws Exception { } else if (tdesc[0].equals("TimestampArithmeticDate")) { generateTimestampArithmeticDate(tdesc); + } else if ( + tdesc[0].equals("GroupByHashSingleKeyOperatorBase") || + tdesc[0].equals("GroupByHashSingleKeyDecimal64Operator") || + tdesc[0].equals("GroupByHashSingleKeyCountColumnOperator") || + tdesc[0].equals("GroupByHashSingleKeyCountKeyOperator") || + tdesc[0].equals("GroupByHashSingleKeyCountStarOperator") || + tdesc[0].equals("GroupByHashMultiKeyCountColumnOperator") || + tdesc[0].equals("GroupByHashMultiKeyCountKeyOperator") || + tdesc[0].equals("GroupByHashMultiKeyCountStarOperator") || + tdesc[0].equals("GroupByHashSingleKeyDuplicateReductionOperator") || + tdesc[0].equals("GroupByHashMultiKeyDuplicateReductionOperator") || + tdesc[0].equals("GroupByHashSingleKeyWordColumnOperator")) { + generateGroupByOperator(tdesc); } else { continue; } @@ -3731,16 +3819,59 @@ private static boolean isTimestampIntervalType(String type) { || type.equals("interval_day_time")); } - private boolean containsDefinedStrings(Set defineSet, String commaDefinedString) { - String[] definedStrings = commaDefinedString.split(","); - boolean result = false; - for (String definedString : definedStrings) { - if (defineSet.contains(definedString)) { - result = true; - break; - } + private void generateGroupByOperator(String[] tdesc) throws Exception { + String templateName = tdesc[0]; + String classNamePrefix = tdesc[1]; + String singleKeyVariation = tdesc[2]; + String classNameSuffix = tdesc[3]; + String aggregationVariation = tdesc[4]; + final boolean isAggregate = + (aggregationVariation.equals("Min") || + aggregationVariation.equals("Max") || + aggregationVariation.equals("Sum")); + final String aggregateDataType; + final String aggregateColumnVectorType; + if (isAggregate) { + aggregateDataType = tdesc[5]; + aggregateColumnVectorType = aggregateDataType + "ColumnVector"; + } else { + aggregateDataType = ""; + aggregateColumnVectorType = ""; + } + + //Read the template into a string; + String className = classNamePrefix + singleKeyVariation + classNameSuffix; + File templateFile = + new File(joinPath(this.groupByOperatorTemplateDirectory, templateName + ".txt")); + String templateString = readFile(templateFile); + + String defineName = singleKeyVariation.toUpperCase() + "_KEY"; + if (isAggregate) { + defineName += "," + aggregationVariation.toUpperCase(); + } + templateString = evaluateIfDefined(templateString, defineName, + this.groupByOperatorTemplateDirectory); + + templateString = templateString.replaceAll("", className); + final String keyColumnVectorType; + if (singleKeyVariation.equals("Long") || singleKeyVariation.equals("Decimal64")) { + keyColumnVectorType = "LongColumnVector"; + } else if (singleKeyVariation.equals("String")) { + keyColumnVectorType = "BytesColumnVector"; + } else { + keyColumnVectorType = "ColumnVector"; } - return result; + templateString = templateString.replaceAll("", singleKeyVariation); + templateString = templateString.replaceAll("", singleKeyVariation.toLowerCase()); + templateString = templateString.replaceAll("", aggregationVariation); + templateString = templateString.replaceAll("", aggregationVariation.toLowerCase()); + templateString = templateString.replaceAll("", keyColumnVectorType); + templateString = templateString.replaceAll("", classNameSuffix); + templateString = templateString.replaceAll("", aggregateDataType.toLowerCase()); + templateString = templateString.replaceAll("", aggregateColumnVectorType); + + writeFile(templateFile.lastModified(), groupByOperatorOutputDirectory, groupByOperatorClassesDirectory, + className, templateString); } private boolean matchesDefinedStrings(Set defineSet, Set newIfDefinedSet, @@ -3797,27 +3928,43 @@ private IfDefinedMode parseIfDefinedMode(String newIfDefinedString, Set return ifDefinedMode; } - private int doIfDefinedStatement(String[] lines, int index, Set desiredIfDefinedSet, - boolean outerInclude, StringBuilder sb) { - String ifLine = lines[index]; + private int doIfDefinedStatement(List linesList, int index, + Set desiredIfDefinedSet, boolean outerInclude, + List ifDefinedEvaluatedLinesList, boolean isExactFilter, boolean filterPredicate) { + String ifLine = linesList.get(index); final int ifLineNumber = index + 1; - String ifDefinedString = ifLine.substring("#IF ".length()); + String ifDefinedString = ifLine.substring("#IF ".length()); Set ifDefinedSet = new HashSet(); IfDefinedMode ifDefinedMode = parseIfDefinedMode(ifDefinedString, ifDefinedSet); - boolean includeBody = matchesDefinedStrings(desiredIfDefinedSet, ifDefinedSet, ifDefinedMode); + + boolean includeBody; + final boolean isExactMatch; + if (isExactFilter) { + + // Normally, we throw away any #IF statements that don't match the desired set. + // But optionally, we filter on exact #IF/@ELSE/#ENDIF statements and let all others through. + isExactMatch = desiredIfDefinedSet.equals(ifDefinedSet); + if (isExactMatch) { + includeBody = filterPredicate; + } else { + includeBody = true; + } + } else { + includeBody = matchesDefinedStrings(desiredIfDefinedSet, ifDefinedSet, ifDefinedMode); + isExactMatch = false; + } index++; - final int end = lines.length; + final int end = linesList.size(); while (true) { if (index >= end) { throw new RuntimeException("Unmatched #IF at line " + index + " for " + ifDefinedString); } - String line = lines[index]; + String line = linesList.get(index); if (line.length() == 0 || line.charAt(0) != '#') { if (outerInclude && includeBody) { - sb.append(line); - sb.append("\n"); + ifDefinedEvaluatedLinesList.add(line); } index++; continue; @@ -3828,10 +3975,18 @@ private int doIfDefinedStatement(String[] lines, int index, Set desiredI // Recurse. index = doIfDefinedStatement( - lines, index, desiredIfDefinedSet, outerInclude && includeBody, sb); + linesList, index, desiredIfDefinedSet, outerInclude && includeBody, + ifDefinedEvaluatedLinesList, isExactFilter, filterPredicate); } else if (line.equals("#ELSE")) { + // Flip inclusion. - includeBody = !includeBody; + if (isExactFilter) { + if (isExactMatch) { + includeBody = !includeBody; + } + } else { + includeBody = !includeBody; + } index++; } else if (line.equals("#ENDIF")) { throw new RuntimeException("Missing defined strings with #ENDIF on line " + (index + 1)); @@ -3843,48 +3998,355 @@ private int doIfDefinedStatement(String[] lines, int index, Set desiredI " do not match \"" + ifDefinedString + "\" (line " + (index + 1) + ")"); } return ++index; + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + if (outerInclude && includeBody) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; + continue; } } } - private void doEvaluateIfDefined(String[] lines, int index, Set definedSet, - boolean outerInclude, StringBuilder sb) { - final int end = lines.length; - while (true) { - if (index >= end) { - break; + private void doProcessIfDefined(List linesList, int index, Set definedSet, + boolean outerInclude, List ifDefinedEvaluatedLinesList, + boolean isExactFilter, boolean predicate) { + final int end = linesList.size(); + while (true) { + if (index >= end) { + break; + } + String line = linesList.get(index); + if (line.length() == 0 || line.charAt(0) != '#') { + if (outerInclude) { + ifDefinedEvaluatedLinesList.add(line); } - String line = lines[index]; - if (line.length() == 0 || line.charAt(0) != '#') { - if (outerInclude) { - sb.append(line); - sb.append("\n"); + index++; + continue; + } + + if (line.startsWith("#IF ")) { + + // A pound # statement (#IF #ELSE #ENDIF). + index = + doIfDefinedStatement( + linesList, index, definedSet, outerInclude, + ifDefinedEvaluatedLinesList, isExactFilter, predicate); + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); + } else { + if (outerInclude) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; + } + } + } + + private void doUseLinesCollectAndFilter(List linesList, + Map> useLinesMap, List filteredLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#BEGIN_LINES ")) { + + final int beginLineIndex = index; + String linesTitle = line.substring("#BEGIN_LINES ".length()); + if (useLinesMap.containsKey(linesTitle)) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- duplicate name " + linesTitle); + } + while (true) { + if (index >= size) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- no matching #END_LINES found"); + } + line = linesList.get(index); + if (line.startsWith("#END_LINES")) { + useLinesMap.put(linesTitle, linesList.subList(beginLineIndex + 1, index)); + break; } index++; - continue; } + } else if (line.startsWith("#COMMENT")) { + // Filter out comment lines. + } else { + filteredLinesList.add(line); + } + index++; + } + } + + private void doUseLinesApply(List linesList, Map> useLinesMap, + List resultLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { - // A pound # statement (IF/ELSE/ENDIF). - if (line.startsWith("#IF ")) { - index = doIfDefinedStatement(lines, index, definedSet, outerInclude, sb); + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#USE_LINES ")) { + + String linesTitle = line.substring("#USE_LINES ".length()); + final int blankCharIndex = linesTitle.indexOf(" "); + int pad = 0; + if (blankCharIndex != -1) { + String remainder = linesTitle.substring(blankCharIndex + 1); + linesTitle = linesTitle.substring(0, blankCharIndex); + if (!remainder.startsWith("+")) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- expecting + sign for indent"); + } + String padString = remainder.substring(1); + pad = Integer.valueOf(padString); + } + List useLines = useLinesMap.get(linesTitle); + if (useLines == null) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- name " + linesTitle + " not found"); + } + if (pad == 0) { + resultLinesList.addAll(useLines); } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + String padoutString = StringUtils.leftPad("", pad); + for (String useLine : useLines) { + if (useLine.length() > 0) { + resultLinesList.add(padoutString + useLine); + } else { + // Do not pad out empty lines. + resultLinesList.add(useLine); + } + } } + } else { + resultLinesList.add(line); } + index++; + } } - private String evaluateIfDefined(String linesString, List definedList) { + private void doIncludeProcessing(String[] lines, String templateDirectory, + List resultList) throws IOException { + + // Just one level. + int index = 0; + final int size = lines.length; + while (true) { + + if (index >= size) { + return; + } + String line = lines[index]; + if (line.startsWith("#INCLUDE ")) { + String includeFileName = line.substring("#INCLUDE ".length()); + File includeFile; + String includeString; + final int blankCharIndex = includeFileName.indexOf(" "); + if (blankCharIndex != -1) { + String remainder = includeFileName.substring(blankCharIndex + 1); + includeFileName = includeFileName.substring(0, blankCharIndex); + + includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + includeString = readFile(includeFile); + + // Process optional comma separated parameters. + String[] parameters = remainder.split(","); + List filterIfDefinedList = new ArrayList(); + List filterIfPredicateList = new ArrayList(); + List substitutionNames = new ArrayList(); + List substitutions = new ArrayList(); + for (String parameter : parameters) { + Character firstChar = parameter.charAt(0); + if (Character.isUpperCase(firstChar)) { + + // #IF filter. + final int equalsCharIndex = parameter.indexOf("="); + if (equalsCharIndex == -1) { + throw new RuntimeException( + "Problem #INCLUDE #IF filter " + index + + " -- no '='"); + } + String filterIfDefinedName = parameter.substring(0, equalsCharIndex); + String predicateString = parameter.substring(equalsCharIndex + 1); + final boolean predicate; + if (predicateString.equalsIgnoreCase("true")) { + predicate = true; + } else if (predicateString.equalsIgnoreCase("false")) { + predicate = false; + } else { + throw new RuntimeException( + "Problem #INCLUDE #IF filter " + index + + " -- expecting 'true' or 'false'"); + } + + filterIfDefinedList.add(filterIfDefinedName); + filterIfPredicateList.add(predicate); + } else if (firstChar == '<') { + + // Substitution. + final int closeCharIndex = parameter.indexOf(">"); + if (closeCharIndex == -1) { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- no '>'"); + } + // Keep <>. + String substitutionName = parameter.substring(0, closeCharIndex + 1); + + Character equalsChar = parameter.charAt(closeCharIndex + 1); + if (equalsChar != '=') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- not '='"); + } + final int substitutionIndex = closeCharIndex + 2; + Character startQuote = parameter.charAt(substitutionIndex); + if (startQuote != '"') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- missing start euote '\"'"); + } + final int parameterSize = parameter.length(); + Character endQuote = parameter.charAt(parameterSize - 1); + if (endQuote != '"') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- missing end euote '\"'"); + } + String substitution = parameter.substring(substitutionIndex + 1, parameterSize - 1); + + substitutionNames.add(substitutionName); + substitutions.add(substitution); + } + } + + // Example: + // + // #INCLUDE file LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + // + final int filterCount = filterIfDefinedList.size(); + for (int f = 0; f < filterCount; f++) { + + // Only process any #IF/#ELSE/#ENDIF that are exact matches. + includeString = + exactFilterIfDefined( + includeString, filterIfDefinedList.get(f), filterIfPredicateList.get(f)); + } + final int substitutionCount = substitutionNames.size(); + for (int s = 0; s < substitutionCount; s++) { + includeString = + includeString.replaceAll( + substitutionNames.get(s), substitutions.get(s)); + } + } else { + includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + includeString = readFile(includeFile); + } + String[] includeLines = includeString.split("\n"); + List includeLinesList = Arrays.asList(includeLines); + resultList.addAll(includeLinesList); + } else { + resultList.add(line); + } + index++; + } + } + + private String processIfDefined(String linesString, List definedList, + String templateDirectory) throws IOException { + return processIfDefined( + linesString, definedList, templateDirectory, + /* isExactFilter */ false, /* filterPredicate */ false); + } + + private String processIfDefined(String linesString, List definedList, + String templateDirectory, boolean isExactFilter, boolean filterPredicate) throws IOException { + String[] lines = linesString.split("\n"); Set definedSet = new HashSet(definedList); + + List includedLinesList; + if (templateDirectory == null) { + includedLinesList = Arrays.asList(lines); + } else { + includedLinesList = new ArrayList(); + doIncludeProcessing(lines, templateDirectory, includedLinesList); + } + + List ifDefinedEvaluatedLinesList = new ArrayList(); + doProcessIfDefined( + includedLinesList, 0, definedSet, true, ifDefinedEvaluatedLinesList, + isExactFilter, filterPredicate); + + Map> useLinesMap = new HashMap>(); + List filteredLinesList = new ArrayList(); + doUseLinesCollectAndFilter(ifDefinedEvaluatedLinesList, useLinesMap, filteredLinesList); + + List resultLinesList; + if (useLinesMap.isEmpty()) { + resultLinesList = filteredLinesList; + } else { + resultLinesList = new ArrayList(); + doUseLinesApply(filteredLinesList, useLinesMap, resultLinesList); + } + StringBuilder sb = new StringBuilder(); - doEvaluateIfDefined(lines, 0, definedSet, true, sb); + for (String line : resultLinesList) { + sb.append(line); + sb.append("\n"); + } return sb.toString(); } - private String evaluateIfDefined(String linesString, String definedString) { - return evaluateIfDefined(linesString, Arrays.asList(definedString.split(","))); + private String evaluateIfDefined(String linesString, List definedList) + throws IOException { + return processIfDefined(linesString, definedList, null); + } + + private String evaluateIfDefined(String linesString, String definedString) + throws IOException{ + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), null); + } + + private String exactFilterIfDefined(String linesString, String definedString, + boolean filterPredicate) + throws IOException{ + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), null, true, filterPredicate); + } + + private String evaluateIfDefined(String linesString, String definedString, + String templateDirectory) throws IOException { + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), templateDirectory); } static void writeFile(long templateTime, String outputDir, String classesDir,