diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 536c7b4..192d4c4 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3455,6 +3455,23 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Exceeding this will trigger a flush irrelevant of memory pressure condition."), HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), + HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED( + "hive.vectorized.execution.groupby.native.enabled", true, + "This flag should be set to true to enable the native vectorization of queries using GroupBy.\n" + + "The default value is true."), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE( + "hive.test.vectorized.execution.groupby.native.override", + "none", new StringSet("none", "enable", "disable"), + "internal use only, used to override the hive.vectorized.execution.groupby.native.enabled\n" + + "setting. Using enable will force it on and disable will force it off.\n" + + "The default none is do nothing, of course", + true), + HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE( + "hive.test.vectorized.groupby.native.max.memory.available", -1, + "internal use only, used for creating different vectorized hash table sizes\n" + + "to exercise more logic\n" + + "The default value is -1 which means don't use it", + true), HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, "This flag should be set to true to enable the new vectorization\n" + "of queries using ReduceSink.\ni" + diff --git data/files/groupby_decimal64_1a.txt data/files/groupby_decimal64_1a.txt new file mode 100644 index 0000000..dbe0d86 --- /dev/null +++ data/files/groupby_decimal64_1a.txt @@ -0,0 +1,18 @@ +55.33 +44.2 +435.33 +324.33 +324.33 +-0.342 +44.2 +55.3 +55.3 +0.0 +66.4 +23.22 +-87.2 +\N +33.44 +55.3 +435.331 +-0.342 \ No newline at end of file diff --git data/files/groupby_decimal64_1a_nonull.txt data/files/groupby_decimal64_1a_nonull.txt new file mode 100644 index 0000000..16ae9e4 --- /dev/null +++ data/files/groupby_decimal64_1a_nonull.txt @@ -0,0 +1,17 @@ +55.33 +44.2 +435.33 +324.33 +324.33 +-0.342 +44.2 +55.3 +55.3 +0.0 +66.4 +23.22 +-87.2 +33.44 +55.3 +435.331 +-0.342 \ No newline at end of file diff --git data/files/groupby_decimal64_1b.txt data/files/groupby_decimal64_1b.txt new file mode 100644 index 0000000..c99fd34 --- /dev/null +++ data/files/groupby_decimal64_1b.txt @@ -0,0 +1,17 @@ +4143-07-08 10:53:27.252,3566.02 +5339-02-01 14:10:01.0,7286.29 +5339-02-01 14:10:01.0,2755.40 +2003-09-23 22:33:17.00003252,2516.50 +5397-07-13 07:12:32.000896438,16966.99 +4143-07-08 10:53:27.252,16966.0 +4143-07-08 10:53:27.252,10402 +2003-09-23 22:33:17.00003252,1735.22 +1966-08-16 13:36:50.1,645.07 +\N,15464.67 +1966-08-16 13:36:50.1,8925.82 +1966-08-16 13:36:50.1,11041.91 +7160-12-02 06:00:24.81,645.93 +1976-05-06 00:42:30.910786948,13831.90 +9075-06-13 16:20:09,9559.53 +1985-07-20 09:30:11.0,\N +1999-10-03 16:59:10.396903939,2755.9 \ No newline at end of file diff --git data/files/groupby_decimal64_1b_nonull.txt data/files/groupby_decimal64_1b_nonull.txt new file mode 100644 index 0000000..974cb9d --- /dev/null +++ data/files/groupby_decimal64_1b_nonull.txt @@ -0,0 +1,16 @@ +4143-07-08 10:53:27.252,3566.02 +5339-02-01 14:10:01.0,7286.29 +5339-02-01 14:10:01.0,2755.40 +2003-09-23 22:33:17.00003252,2516.50 +5397-07-13 07:12:32.000896438,16966.99 +4143-07-08 10:53:27.252,16966.0 +4143-07-08 10:53:27.252,10402 +2003-09-23 22:33:17.00003252,1735.22 +1966-08-16 13:36:50.1,645.07 +\N,15464.67 +1966-08-16 13:36:50.1,8925.82 +1966-08-16 13:36:50.1,11041.91 +7160-12-02 06:00:24.81,645.93 +1976-05-06 00:42:30.910786948,13831.90 +9075-06-13 16:20:09,9559.53 +1999-10-03 16:59:10.396903939,2755.9 \ No newline at end of file diff --git data/files/groupby_long_1a.txt data/files/groupby_long_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/groupby_long_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/groupby_long_1a_nonull.txt data/files/groupby_long_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/groupby_long_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/groupby_long_1b.txt data/files/groupby_long_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/groupby_long_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1b_nonull.txt data/files/groupby_long_1b_nonull.txt new file mode 100644 index 0000000..0b438a2 --- /dev/null +++ data/files/groupby_long_1b_nonull.txt @@ -0,0 +1,12 @@ +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/groupby_long_1c.txt data/files/groupby_long_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/groupby_long_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_long_1c_nonull.txt data/files/groupby_long_1c_nonull.txt new file mode 100644 index 0000000..f6bc6e8 --- /dev/null +++ data/files/groupby_long_1c_nonull.txt @@ -0,0 +1,10 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/groupby_multi_1a.txt data/files/groupby_multi_1a.txt new file mode 100644 index 0000000..e41458d --- /dev/null +++ data/files/groupby_multi_1a.txt @@ -0,0 +1,56 @@ +2268-07-27,43 +1988-01-10,22 +2083-03-10,51 +2207-09-16,15 +2111-10-04,-81 +2088-05-07,-15 +1833-09-17,16 +2204-06-14,22 +1879-03-14,51 +2025-05-17,51 +2207-04-24,-92 +1809-10-10,-28 +1805-12-21,16 +2207-09-16,\N +2194-06-19,-126 +1971-06-16,24 +2251-08-16,\N +1845-11-11,-126 +1858-09-10,22 +2059-05-11,-39 +1892-05-06,-103 +2207-09-16,-13 +1937-09-06,-126 +1820-12-15,51 +2006-12-15,16 +1892-05-06,-121 +\N,-126 +2268-07-27,-12 +2268-07-27,114 +2151-11-20,16 +2268-07-27,118 +2029-11-21,-75 +1859-01-20,16 +1950-10-06,-39 +2185-07-27,51 +2207-09-16,\N +1892-05-06,61 +2207-09-16,-105 +2268-07-27,-117 +2207-04-24,0 +2207-09-16,124 +2059-05-11,-39 +1805-12-21,16 +1805-12-21,16 +2249-12-20,51 +2207-09-16,116 +2207-09-16,122 +2064-09-04,-126 +1869-03-17,-126 +1804-02-16,-39 +1960-04-02,-75 +2086-09-20,-69 +\N,\N +2196-04-12,22 +2251-08-16,-94 +2268-07-27,-12 \ No newline at end of file diff --git data/files/groupby_multi_1a_nonull.txt data/files/groupby_multi_1a_nonull.txt new file mode 100644 index 0000000..9542f64 --- /dev/null +++ data/files/groupby_multi_1a_nonull.txt @@ -0,0 +1,55 @@ +2268-07-27,43 +1988-01-10,22 +2083-03-10,51 +2207-09-16,15 +2111-10-04,-81 +2088-05-07,-15 +1833-09-17,16 +2204-06-14,22 +1879-03-14,51 +2025-05-17,51 +2207-04-24,-92 +1809-10-10,-28 +1805-12-21,16 +2207-09-16,\N +2194-06-19,-126 +1971-06-16,24 +2251-08-16,\N +1845-11-11,-126 +1858-09-10,22 +2059-05-11,-39 +1892-05-06,-103 +2207-09-16,-13 +1937-09-06,-126 +1820-12-15,51 +2006-12-15,16 +1892-05-06,-121 +\N,-126 +2268-07-27,-12 +2268-07-27,114 +2151-11-20,16 +2268-07-27,118 +2029-11-21,-75 +1859-01-20,16 +1950-10-06,-39 +2185-07-27,51 +2207-09-16,\N +1892-05-06,61 +2207-09-16,-105 +2268-07-27,-117 +2207-04-24,0 +2207-09-16,124 +2059-05-11,-39 +1805-12-21,16 +1805-12-21,16 +2249-12-20,51 +2207-09-16,116 +2207-09-16,122 +2064-09-04,-126 +1869-03-17,-126 +1804-02-16,-39 +1960-04-02,-75 +2086-09-20,-69 +2196-04-12,22 +2251-08-16,-94 +2268-07-27,-12 \ No newline at end of file diff --git data/files/groupby_serialize_1a.txt data/files/groupby_serialize_1a.txt new file mode 100644 index 0000000..cae1ecc --- /dev/null +++ data/files/groupby_serialize_1a.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309 +\N +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +\N +\N +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +\N +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1a_nonull.txt data/files/groupby_serialize_1a_nonull.txt new file mode 100644 index 0000000..0520a9a --- /dev/null +++ data/files/groupby_serialize_1a_nonull.txt @@ -0,0 +1,13 @@ +2061-12-19 22:10:32.000628309 +2686-05-23 07:46:46.565832918 +2082-07-14 04:00:40.695380469 +2188-06-04 15:03:14.963259704 +2608-02-23 23:44:02.546440891 +2093-04-10 23:36:54.846 +2898-10-01 22:27:02.000871113 +2306-06-21 11:02:00.143124239 +2306-06-21 11:02:00.143124239 +2093-04-10 23:36:54.846 +2686-05-23 07:46:46.565832918 +2093-04-10 23:36:54.846 +2299-11-15 16:41:30.401 diff --git data/files/groupby_serialize_1b.txt data/files/groupby_serialize_1b.txt new file mode 100644 index 0000000..c47bae0 --- /dev/null +++ data/files/groupby_serialize_1b.txt @@ -0,0 +1,47 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 \ No newline at end of file diff --git data/files/groupby_serialize_1b_nonull.txt data/files/groupby_serialize_1b_nonull.txt new file mode 100644 index 0000000..e640b42 --- /dev/null +++ data/files/groupby_serialize_1b_nonull.txt @@ -0,0 +1,66 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 \ No newline at end of file diff --git data/files/groupby_string_1a.txt data/files/groupby_string_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/groupby_string_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/groupby_string_1a_nonull.txt data/files/groupby_string_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/groupby_string_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/groupby_string_1c.txt data/files/groupby_string_1c.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/groupby_string_1c.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/groupby_string_1c_nonull.txt data/files/groupby_string_1c_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/groupby_string_1c_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git itests/cmd itests/cmd new file mode 100644 index 0000000..6672fe4 --- /dev/null +++ itests/cmd @@ -0,0 +1,140 @@ +rm ../ql/src/test/results/clientpositive/llap/parquet_analyze.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_map_emptynullvals.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_null_element.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_multi_field_struct.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_optional_elements.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_required_elements.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_single_field_struct.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_structs.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_unannotated_groups.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_array_of_unannotated_primitives.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_avro_array_of_primitives.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_avro_array_of_single_field_struct.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_columnar.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_create.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ctas.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_decimal.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_decimal1.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_external_time.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_join.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_join2.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_map_null.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_map_of_arrays_of_ints.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_map_of_maps.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_mixed_case.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_mixed_partition_formats.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_mixed_partition_formats2.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_nested_complex.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_no_row_serde.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_partitioned.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_boolean.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_char.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_date.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_multifiles.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_partition.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_timestamp.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_ppd_varchar.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown_2.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_read_backward_compatible_files.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_schema_evolution.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_serde.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_table_with_subschema.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_thrift_array_of_primitives.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_thrift_array_of_single_field_struct.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_type_promotion.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_0.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_1.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_10.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_13.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_14.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_15.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_16.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_17.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_2.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_4.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_5.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_6.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_7.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_8.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_decimal_date.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_div0.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_limit.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_nested_udf.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_not.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_offset_limit.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_part.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_part_project.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_part_varchar.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_vectorization_pushdown.q.out +rm ../ql/src/test/results/clientpositive/llap/parquet_write_correct_definition_levels.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_analyze.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_map_emptynullvals.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_null_element.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_multi_field_struct.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_optional_elements.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_required_elements.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_single_field_struct.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_structs.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_unannotated_groups.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_array_of_unannotated_primitives.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_avro_array_of_primitives.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_avro_array_of_single_field_struct.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_columnar.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_complex_types_vectorization.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_create.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ctas.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_decimal.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_decimal1.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_external_time.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_join2.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_map_null.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_map_of_arrays_of_ints.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_map_of_maps.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_map_type_vectorization.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_mixed_case.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_mixed_partition_formats.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_mixed_partition_formats2.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_nested_complex.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_no_row_serde.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_partitioned.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_boolean.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_char.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_date.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_decimal.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_multifiles.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_partition.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_timestamp.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_ppd_varchar.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_predicate_pushdown.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_predicate_pushdown_2.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_read_backward_compatible_files.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_schema_evolution.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_serde.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_struct_type_vectorization.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_table_with_subschema.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_thrift_array_of_primitives.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_thrift_array_of_single_field_struct.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_type_promotion.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_types.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_types_non_dictionary_encoding_vectorization.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_types_vectorization.q.out +rm ../ql/src/test/results/clientpositive/spark/parquet_write_correct_definition_levels.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_dynamic_partition_pruning.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_dynamic_semijoin_reduction.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_dynamic_semijoin_reduction2.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_insert_into_bucketed_table.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_join46.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_mapjoin3.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_multi_output_select.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_parquet.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_parquet_types.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_timestamp.q.out +rm ../ql/src/test/results/clientpositive/spark/vectorized_timestamp_ints_casts.q.out \ No newline at end of file diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java index 1f87f8d..cb7fc4e 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/operators/VectorGroupByOperatorBench.java @@ -17,6 +17,8 @@ package org.apache.hive.benchmark.vectorization.operators; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.Random; import org.apache.hadoop.conf.Configuration; @@ -38,9 +40,16 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -66,6 +75,12 @@ public class VectorGroupByOperatorBench extends AbstractOperatorBench { @Param({ + "original", + "native" + }) + private String implementation; + + @Param({ "true", "false" }) @@ -93,6 +108,7 @@ @Param({ "count", + // "countStar", "min", "max", "sum", @@ -109,6 +125,7 @@ @Param({ "bigint", + "date", "double", "string", "decimal(7,2)", // to use this via command line arg "decimal(7_2)" @@ -118,7 +135,7 @@ private String dataType; private Random rand = new Random(1234); - private VectorGroupByOperator vgo; + private Operator vgo; private VectorizedRowBatch vrb; private int size = VectorizedRowBatch.DEFAULT_SIZE; @@ -135,10 +152,62 @@ public void setup() { VectorizationContext ctx = new VectorizationContext("name", ImmutableList.of("A")); GroupByDesc desc = buildGroupByDescType(aggregation, evalMode, "A", typeInfo, processMode); Operator groupByOp = OperatorFactory.get(new CompilationOpContext(), desc); - VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc(); + VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) desc.getVectorDesc(); vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH); - vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc); - vgo.initialize(new Configuration(), null); + if (implementation == null || implementation.equalsIgnoreCase("original")) { + vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc); + vgo.initialize(new Configuration(), null); + } else if (implementation.equalsIgnoreCase("native")) { + if (!aggregation.equalsIgnoreCase("count")) { + System.out.println("Only aggregation count with String supported"); + System.exit(0); + } + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + vectorGroupByInfo.setAggregationVariation(AggregationVariation.HASH_SINGLE_COUNT); + final SingleCountAggregationKind singleCountAggregationKind; + // if (desc.getAggregators().get(0).getParameters().size() == 0) { + // singleCountAggregationKind = SingleCountAggregationKind.COUNT_STAR; + // } else { + singleCountAggregationKind = SingleCountAggregationKind.COUNT_KEY; + // } + vectorGroupByInfo.setSingleCountAggregation( + new SingleCountAggregation(singleCountAggregationKind)); + + final HashTableKeyType hashTableKeyType; + switch (dataType) { + case "bigint": + case "date": + hashTableKeyType = HashTableKeyType.LONG; + break; + case "string": + hashTableKeyType = HashTableKeyType.STRING; + break; + default: + hashTableKeyType = HashTableKeyType.SERIALIZE; + break; + } + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(20000000); + + vectorGroupByDesc.setVectorGroupByInfo(vectorGroupByInfo); + + String issue = + Vectorizer.doVectorizeGroupByOperatorPreparation( + groupByOp, ctx, vectorGroupByDesc); + if (issue != null) { + System.out.println(issue); + System.exit(0); + } + vgo = + Vectorizer.specializeGroupByOperator( + groupByOp, ctx, (GroupByDesc) groupByOp.getConf(), vectorGroupByDesc); + vgo.initialize(new Configuration(), null); + } else { + System.out.println("Unknown implementation " + implementation); + System.exit(0); + } + System.out.println("implementation class " + vgo.getClass().getSimpleName()); + } catch (Exception e) { // likely unsupported combination of params // https://bugs.openjdk.java.net/browse/CODETOOLS-7901296 is not available yet to skip benchmark cleanly @@ -162,6 +231,11 @@ private GroupByDesc buildGroupByDescType( outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); + ArrayList keys = new ArrayList(); + keys.add( + new ExprNodeColumnDesc( + dataType, "A", "table", false)); + desc.setKeys(keys); desc.setVectorDesc(new VectorGroupByDesc()); desc.setOutputColumnNames(outputColumnNames); @@ -191,6 +265,9 @@ private AggregationDesc buildAggregationDesc( GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = (GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); udafBloomFilterEvaluator.setHintEntries(10000); + } else if (aggregate.equals("countStar")) { + aggregate = "count"; + params = new ArrayList(); } agg.setGenericUDAFName(aggregate); agg.setMode(mode); diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3aaa68b..1fd752a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -299,7 +299,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_groupby4.q,\ vector_groupby6.q,\ vector_groupby_3.q,\ + vector_groupby_singlekey.q,\ vector_groupby_mapjoin.q,\ + vector_groupby_multikey.q,\ vector_groupby_reduce.q,\ vector_grouping_sets.q,\ vector_if_expr.q,\ diff --git ql/pom.xml ql/pom.xml index 165610f..ed0dc8a 100644 --- ql/pom.xml +++ ql/pom.xml @@ -835,6 +835,7 @@ classpath="${compile.classpath}"/> + diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt new file mode 100644 index 0000000..8f7d28e --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashCommonLines.txt @@ -0,0 +1,291 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to any operator variation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any key variation specific transient variables. +#COMMENT +#BEGIN_LINES KEY_VARIATION_TRANSIENT +#IF SERIALIZE_KEY||MULTI_KEY + // Object that can take the column(s) in row in a vectorized row batch and serialized it. + // The key is not NULL. + private transient VectorSerializeRow keyVectorSerializeWrite; + + // The BinarySortable serialization of the current key. + private transient Output currentKeyOutput; + + // The BinarySortable serialization of the next key for a possible series of equal keys. + private transient Output nextKeyOutput; + +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific Operator import code lines. +#COMMENT +#BEGIN_LINES KEY_VARIATION_OPERATOR_IMPORTS +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Helpful variables for accessing the key values for the LONG and STRING variations. +#COMMENT (None needed for SERIALIZE_KEY or MULTI_KEY) +#COMMENT +#BEGIN_LINES KEY_VECTOR_VARIABLES +#IF LONG_KEY + long[] keyVector = keyColVector.vector; +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[][] keyVector = keyColVector.vector; + final int[] keyStart = keyColVector.start; + final int[] keyLength = keyColVector.length; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES + final int firstBatchIndex = selected[0]; +#IF LONG_KEY + long currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[firstBatchIndex]; + int currentKeyStart = keyStart[firstBatchIndex]; + int currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for logical (i.e. selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SERIALIZE_KEY + final int firstBatchIndex = selected[0]; + if (keyIsNull[firstBatchIndex]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SERIALIZE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[firstBatchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[firstBatchIndex]; + currentKeyStart = keyStart[firstBatchIndex]; + currentKeyLength = keyLength[firstBatchIndex]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys have no +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#IF LONG_KEY + long currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey = keyVector[0]; + int currentKeyStart = keyStart[0]; + int currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] currentKey = currentKeyOutput.getData(); + int currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Current key values for physical (i.e. NOT selectedInUse) and the batch's keys may have +#COMMENT NULLs case. All variations. +#COMMENT +#BEGIN_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES + boolean[] keyIsNull = keyColVector.isNull; + boolean currKeyIsNull; + +#IF LONG_KEY + long currentKey; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] currentKey; + int currentKeyStart; + int currentKeyLength; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + byte[] currentKey; + int currentKeyLength; +#ENDIF SERIALIZE_KEY + if (keyIsNull[0]) { + currKeyIsNull = true; +#IF LONG_KEY + currentKey = 0; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = null; + currentKeyStart = 0; + currentKeyLength = 0; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + currentKey = null; + currentKeyLength = 0; +#ENDIF SERIALIZE_KEY + } else { + currKeyIsNull = false; +#IF LONG_KEY + currentKey = keyVector[0]; +#ENDIF LONG_KEY +#IF STRING_KEY + currentKey = keyVector[0]; + currentKeyStart = keyStart[0]; + currentKeyLength = keyLength[0]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Get next key value at batchIndex. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES GET_NEXT_KEY +#IF LONG_KEY + final long nextKey = keyVector[batchIndex]; +#ENDIF LONG_KEY +#IF STRING_KEY + byte[] nextKey = keyVector[batchIndex]; + final int nextKeyStart = keyStart[batchIndex]; + final int nextKeyLength = keyLength[batchIndex]; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + final byte[] nextKey = nextKeyOutput.getData(); + final int nextKeyLength = nextKeyOutput.getLength(); +#ENDIF SERIALIZE_KEY +#IF MULTI_KEY + Not Applicable -- see MULTI_KEY_GET_NEXT_KEY instead. +#ENDIF MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT ELSE IF statement next key value equals current key value for all variations. +#COMMENT +#BEGIN_LINES ELSE_IF_NEXT_EQUALS_CURRENT +#IF LONG_KEY + } else if (currentKey == nextKey) { +#ENDIF LONG_KEY +#IF STRING_KEY + } else if (StringExpr.equal( + currentKey, currentKeyStart, currentKeyLength, + nextKey, nextKeyStart, nextKeyLength)) { +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + } else if (StringExpr.equal( + currentKey, 0, currentKeyLength, + nextKey, 0, nextKeyLength)) { +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Remember the next key value as the current key value. All variations. +#COMMENT +#BEGIN_LINES NEW_CURRENT_KEY + currentKey = nextKey; +#IF STRING_KEY + currentKeyStart = nextKeyStart; + currentKeyLength = nextKeyLength; +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + currentKeyLength = nextKeyLength; + final Output tempOutput = nextKeyOutput; + nextKeyOutput = currentKeyOutput; + currentKeyOutput = tempOutput; +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionCommonLines.txt new file mode 100644 index 0000000..b62fca7 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashDuplicateReductionCommonLines.txt @@ -0,0 +1,103 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Duplicate Reduction operator variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED +#IF LONG_KEY + if (currentKey == 0) { + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_DUPLICATE_REDUCTION_KEY +#IF LONG_KEY + if (currentKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SERIALIZE_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_DUPLICATE_REDUCTION_KEY + if (!currKeyIsNull) { +#IF LONG_KEY + if (currentKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesDuplicateReductionKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength)); +#ENDIF SERIALIZE_KEY||MULTI_KEY + } +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt new file mode 100644 index 0000000..17e50da --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyCommonLines.txt @@ -0,0 +1,108 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to MULTI_KEY variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any multi-key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + + final int size = groupByKeyExpressions.length; + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(size)); + + TypeInfo[] typeInfos = new TypeInfo[size]; + int[] columnMap = new int[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + columnMap[i] = keyExpr.getOutputColumnNum(); + } + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any multi-key variation specific next key code lines. +#COMMENT +#BEGIN_LINES MULTI_KEY_GET_NEXT_KEY + final boolean nextKeyIsNull; + final byte[] nextKey; + final int nextKeyLength; + keyVectorSerializeWrite.setOutput(nextKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + if (keyVectorSerializeWrite.getIsAllNulls()) { + nextKeyIsNull = true; + nextKey = null; + nextKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + nextKeyIsNull = false; + nextKey = nextKeyOutput.getData(); + nextKeyLength = nextKeyOutput.getLength(); + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Logical current key values for mutli-key. +#COMMENT +#BEGIN_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES + boolean currKeyIsNull; + byte[] currentKey; + int currentKeyLength; + + final int firstBatchIndex = selected[0]; + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, firstBatchIndex); + if (keyVectorSerializeWrite.getIsAllNulls()) { + currKeyIsNull = true; + currentKey = null; + currentKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Physical current key values for mutli-key. +#COMMENT +#BEGIN_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES + boolean currKeyIsNull; + byte[] currentKey; + int currentKeyLength; + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + if (keyVectorSerializeWrite.getIsAllNulls()) { + currKeyIsNull = true; + currentKey = null; + currentKeyLength = 0; + + // We note we encountered a NULL key. + haveNullKey = true; + } else { + currKeyIsNull = false; + currentKey = currentKeyOutput.getData(); + currentKeyLength = currentKeyOutput.getLength(); + } +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt new file mode 100644 index 0000000..31698fe --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionInclude.txt @@ -0,0 +1,67 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeyDuplicateReductionOperator. +#COMMENT +#COMMENT + protected void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_DUPLICATE_REDUCTION_KEY + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..1ac4b74 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeyDuplicateReductionOperator.txt @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.duplicatereduction.VectorGroupByHashMultiKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key Native Vectorized GroupBy with no aggregation. + * + * (For more comments, see GroupByHashSingleKeyDuplicateReductionOperator.txt). + */ +public class VectorGroupByHashMultiKeyDuplicateReductionOperator + extends VectorGroupByHashMultiKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyDuplicateReductionOperator() { + super(); + } + + public VectorGroupByHashMultiKeyDuplicateReductionOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashDuplicateReductionCommonLines + +#INCLUDE GroupByHashMultiKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + outputBatch.size++; + } + + doOutputMultiKeys(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountColumnInclude.txt new file mode 100644 index 0000000..4dbffb4 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountColumnInclude.txt @@ -0,0 +1,292 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeySingleCountColumnOperator. +#COMMENT +#COMMENT + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsColumn). + */ + private void doNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COLUMN_COUNT_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New NULL key. + currKeyIsNull = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_KEY_COLUMN_COUNT + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COLUMN_COUNT_KEY + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNoNullsColumn(batch, inputLogicalSize); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doRepeatingNullColumn(batch, inputLogicalSize); + + } else { + + // Non-NULL repeating non-key column. + doNoNullsColumn(batch, inputLogicalSize); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsColumn(batch, inputLogicalSize, nonKeyColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountColumnOperator.txt new file mode 100644 index 0000000..e85e491 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountColumnOperator.txt @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.singlecount.VectorGroupByHashMultiKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(non-key-column) Native Vectorized GroupBy. + * + * (For more comments, see GroupByHashSingleKeySingleCountColumnInclude.txt). + */ +public class VectorGroupByHashMultiKeySingleCountColumnOperator + extends VectorGroupByHashMultiKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeySingleCountColumnOperator() { + super(); + } + + public VectorGroupByHashMultiKeySingleCountColumnOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = singleCountAggregation.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashSingleCountColumnCommonLines + +#INCLUDE GroupByHashMultiKeySingleCountColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeySingleCountColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputSingleCountForNullMultiKey(nullKeyCount); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountKeyInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountKeyInclude.txt new file mode 100644 index 0000000..704f28b --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountKeyInclude.txt @@ -0,0 +1,95 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeySingleCountKeyOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_KEY + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountKeyOperator.txt new file mode 100644 index 0000000..451496f --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountKeyOperator.txt @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.singlecount.VectorGroupByHashMultiKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(key-column) Native Vectorized GroupBy. + * + * (For more comments see GroupByHashSingleKeySingleCountKeyOperator.txt). + */ +public class VectorGroupByHashMultiKeySingleCountKeyOperator + extends VectorGroupByHashMultiKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeySingleCountKeyOperator() { + super(); + } + + public VectorGroupByHashMultiKeySingleCountKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashSingleCountKeyCommonLines + +#INCLUDE GroupByHashMultiKeySingleCountKeyInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeySingleCountKeyInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputSingleCountForNullMultiKey(/* nullKeyCount */ 0); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountStarInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountStarInclude.txt new file mode 100644 index 0000000..ca7b953 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountStarInclude.txt @@ -0,0 +1,87 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashMultiKeySingleCountStarOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + private void handle(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_MULTI_KEY_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES MULTI_KEY_GET_NEXT_KEY + + if (nextKeyIsNull) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_STAR + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountStarOperator.txt new file mode 100644 index 0000000..b98bae7 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashMultiKeySingleCountStarOperator.txt @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.singlecount.VectorGroupByHashMultiKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a multi-key COUNT(*) Native Vectorized GroupBy that is lookup on + * a single long using a specialized hash map. + * + * (For more comments see GroupByHashSingleKeySingleCountStarOperator.txt). + */ +public class VectorGroupByHashMultiKeySingleCountStarOperator + extends VectorGroupByHashMultiKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeySingleCountStarOperator() { + super(); + } + + public VectorGroupByHashMultiKeySingleCountStarOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = singleCountAggregation.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + +#USE_LINES MULTI_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashMultiKeyCommonLines +#INCLUDE GroupByHashSingleCountStarCommonLines + +#INCLUDE GroupByHashMultiKeySingleCountStarInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashMultiKeySingleCountStarInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + if (batch.selectedInUse) { + handleLogical(batch, inputLogicalSize); + } else { + handlePhysical(batch, inputLogicalSize); + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + if (haveNullKey) { + outputSingleCountForNullMultiKey(nullKeyCount); + } + + doOutputMultiKeyAndCounts(); + } +} diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountColumnCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountColumnCommonLines.txt new file mode 100644 index 0000000..fda4c8b --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountColumnCommonLines.txt @@ -0,0 +1,193 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(non-key-column) aggregations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COLUMN_COUNT_KEY_ENDED +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- create the hash table entry if necessary; +#COMMENT ignore if it already present since the count is 0 in this case. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY +#IF MULTI_KEY + Not applicable +#ENDIF MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_COLUMN_COUNT_KEY + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY + } +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, create the hash table +#COMMENT entry if necessary; ignore if it already present since the count is 0 in this case. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_KEY_COLUMN_COUNT +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SERIALIZE_KEY +#IF MULTI_KEY + Not applicable +#ENDIF MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, create the hash table entry +#COMMENT if necessary; ignore if it already present since the count is 0 in this case. +#COMMENT All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_KEY_COLUMN_COUNT + if (currKeyIsNull) { + haveNullKey = true; + } else { +#IF LONG_KEY + findOrCreateLongZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + 0); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + 0); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + 0); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT=========================================================================================== +#COMMENT \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountKeyCommonLines.txt new file mode 100644 index 0000000..ea42f21 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountKeyCommonLines.txt @@ -0,0 +1,96 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(key-column) aggregation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COUNT_KEY_ENDED +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_COUNT_KEY +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_COUNT_KEY + if (!currKeyIsNull) { +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY + } +#END_LINES \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountStarCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountStarCommonLines.txt new file mode 100644 index 0000000..d72beb6 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleCountStarCommonLines.txt @@ -0,0 +1,101 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single COUNT(*) aggregation. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT The current series of equal keys ended -- find or create the hash table entry and +#COMMENT add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES CURRENT_COUNT_STAR_ENDED +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch of no NULL keys, find or create the hash table +#COMMENT entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NO_NULLS_COUNT_STAR +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT After the key processing loop for a batch which may have NULL keys, find or create the +#COMMENT hash table entry and add or initialize it with the count. All variations. +#COMMENT +#BEGIN_LINES LAST_NULLS_COUNT_STAR + if (currKeyIsNull) { + haveNullKey = true; + nullKeyCount += count; + } else { +#IF LONG_KEY + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); +#ENDIF LONG_KEY +#IF STRING_KEY + findOrCreateBytesKey( + currentKey, currentKeyStart, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, currentKeyStart, currentKeyLength), + count); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY||MULTI_KEY + findOrCreateBytesKey( + currentKey, 0, currentKeyLength, + HashCodeUtil.calculateBytesHashCode( + currentKey, 0, currentKeyLength), + count); +#ENDIF SERIALIZE_KEY||MULTI_KEY + } +#END_LINES +#COMMENT=========================================================================================== +#COMMENT \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt new file mode 100644 index 0000000..0d8a345 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyCommonLines.txt @@ -0,0 +1,42 @@ +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#COMMENT +#COMMENT THIS FILE: Common to Single Key variations. +#COMMENT +#COMMENT +#COMMENT=========================================================================================== +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific ColumnVector import code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +#IF STRING_KEY +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +#ENDIF STRING_KEY +#END_LINES +#COMMENT +#COMMENT ******************************************************************************************* +#COMMENT Any single key variation specific initializeOp code lines. +#COMMENT +#BEGIN_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP +#IF SERIALIZE_KEY + + keyVectorSerializeWrite = + new VectorSerializeRow( + new BinarySortableSerializeWrite(1)); + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + int[] columnMap = new int[] { groupByKeyExpressions[0].getOutputColumnNum() }; + keyVectorSerializeWrite.init(typeInfos, columnMap); + + currentKeyOutput = new Output(); + nextKeyOutput = new Output(); +#ENDIF SERIALIZE_KEY +#END_LINES diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDecimal64Operator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDecimal64Operator.txt new file mode 100644 index 0000000..7692f67 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDecimal64Operator.txt @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a DECIMAL_64 Native Vectorized GroupBy. + */ +public class + extends VectorGroupByHashLong { + + private static final long serialVersionUID = 1L; + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt new file mode 100644 index 0000000..05a2a99 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionInclude.txt @@ -0,0 +1,128 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeyDuplicateReductionOperator. +#COMMENT +#COMMENT + /* + * batch processing for NO NULLS key case. + * + * Do find/create on each key. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current key ended. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_DUPLICATE_REDUCTION_KEY + } + + /* + * batch processing for NULLS key case. + * + * For all NULL keys cases we note NULL key exists since we don't represent it in the slot table. + * + * Do find/create on each non-NULL key. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + if (currKeyIsNull) { + + // We note we encountered a NULL key. + haveNullKey = true; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // NULL key series. + } else { + + // Current non-NULL key ended by NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + + // We note we encountered a NULL key. + haveNullKey = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // Equal key series. + } else { + + // Current non-NULL key ended by another non-NULL key. +#USE_LINES CURRENT_DUPLICATE_REDUCTION_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_DUPLICATE_REDUCTION_KEY + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt new file mode 100644 index 0000000..77e6312 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeyDuplicateReductionOperator.txt @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.duplicatereduction.VectorGroupByHashKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key Native Vectorized GroupBy with no aggregation. + * + * It is used on a single key for duplicate reduction. + * + * Final duplicate elimination must be done in reduce-shuffle and a reducer since with hash table + * overflow some duplicates can slip through. And, of course, other vertices may contribute + * the same keys. + */ +public class + extends VectorGroupByHashKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashDuplicateReductionCommonLines + + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For the all NULL or all 0 keys case we note NULL/0 key exists. Otherwise, we do the + * find/create. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + if (repeatingKey == 0) { + + // We don't store 0 in the slot table so it can be used to indicate an empty slot. + haveZeroKey = true; + } else { + findOrCreateLongDuplicateReductionKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey)); + } +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesDuplicateReductionKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength)); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesDuplicateReductionKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength)); +#ENDIF SERIALIZE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + } + } + +#INCLUDE GroupByHashSingleKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeyDuplicateReductionInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + ColumnVector keyColumnVector = outputBatch.cols[0]; + + if (haveNullKey) { + + // NULL entry to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + outputBatch.size++; + } + +#IF LONG_KEY + doOutputLongKeys((LongColumnVector) keyColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeys((BytesColumnVector) keyColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeys(keyColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnInclude.txt new file mode 100644 index 0000000..31eaa14 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnInclude.txt @@ -0,0 +1,498 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeySingleCountColumnOperator. +#COMMENT +#COMMENT + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNoNullsKey. + * + * Look for sequences of equal keys and determine their count. + */ + private void doNoNullsKeyNoNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNoNullsKey. + * + * Scan for sequences of equal keys. The column count is simply 0 because of all NULL values -- + * but we still must create an entry in the slot table. + */ + private void doNoNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + // No counting. + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New current key. +#USE_LINES NEW_CURRENT_KEY + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_KEY_COLUMN_COUNT + } + + /* + * Do the NO REPEATING NULLS case for handleNoNullsKey. + * + * Look for sequence of equal keys -- look over at the non-key-column and count non-null rows. + * Even when the non-NULL row count is 0, we still must create an entry in the slot table. + */ + private void doNoNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COLUMN_COUNT_KEY + } + + /* + * <OrPhysical> batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * In general, loop over key column and process the keys. Look for sequences of equal keys. And, + * at the same time do any processing for the non-key-column counting. + * + * Here are the cases: + * + * 1) When non-key-column {REPEATING|NO REPEATING} NO NULLS, look for sequences of equal keys + * and determine their count. + * + * 2) When non-key-column REPEATING NULLS, scan for sequences of equal keys. The column count + * is simply 0 because of all NULL values -- but we still must create an entry in the + * slot table. + * + * 3) Otherwise, non-key-column NO REPEATING NULLS, as we are looking for sequence of + * equal keys -- look over at the non-key-column and count non-null rows. Even when the + * non-null row count is 0, we still must create an entry in the slot table. + * + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doNoNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // REPEATING NO NULLS + doNoNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS. + + doNoNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } + + /* + * Do the non-key-column {REPEATING|NO REPEATING} NO NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNoNullsColumn). + */ + private void doNullsKeyNoNullsColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COLUMN_COUNT_KEY + } + + /* + * Do the non-key-column REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyRepeatingNullColumn). + */ + private void doNullsKeyRepeatingNullColumn(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + + // This loop basically does any needed key creation since the non-key count is 0 because + // repeating non-key NULL. + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // No counting. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New NULL key. + currKeyIsNull = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + // No counting + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED_ZERO_COUNT + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_KEY_COLUMN_COUNT + } + } + + /* + * Do the non-key-column NO REPEATING NULLS case for handleNullsKey. + * + * (For remaining comments see doNoNullsKeyNullsColumn). + */ + private void doNullsKeyNullsColumn(VectorizedRowBatch batch, + final int inputLogicalSize, keyColVector, ColumnVector nonKeyColVector) + throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + +#IF LOGICAL_BATCH_PROCESSING + int count = (nonKeyIsNull[firstBatchIndex] ? 0 : 1); + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + int count = (nonKeyIsNull[0] ? 0 : 1); + + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New NULL key. + currKeyIsNull = true; + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + + } else { + +#USE_LINES GET_NEXT_KEY + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT + + count += (nonKeyIsNull[batchIndex] ? 0 : 1); + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COLUMN_COUNT_KEY_ENDED + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY + + count = (nonKeyIsNull[batchIndex] ? 0 : 1); + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COLUMN_COUNT_KEY + } + + /* + * batch processing for NULLS key case. + * + * Both NULL and non-NULL keys will have counts for non-key-columns. + * + * In general, loop over key column and process the keys. Look for sequences of NULL keys or + * equal keys. And, at the same time do any processing for the non-key-column counting. + * + * (See the non-key column case comments for handleNoNullsKey). + * + * In all cases above, when its a NULL key, do NULL entry processing. + * + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: {REPEATING|NO REPEATING} NO NULLS + + doNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible. + + if (nonKeyColVector.isNull[0]) { + + // NULL repeating non-key column. + doNullsKeyRepeatingNullColumn(batch, inputLogicalSize, keyColVector); + + } else { + + // Non-NULL repeating non-key column. + doNullsKeyNoNullsColumn(batch, inputLogicalSize, keyColVector); + + } + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + + doNullsKeyNullsColumn(batch, inputLogicalSize, keyColVector, nonKeyColVector); + + } + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnOperator.txt new file mode 100644 index 0000000..3da154d --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountColumnOperator.txt @@ -0,0 +1,308 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.singlecount.VectorGroupByHashKeySingleCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(non-key-column) Native Vectorized GroupBy. + * That is, the grouping is being done on a single long key and + * the counting is for a another ("non-key") column (which can be any data type). + * + * We make a single pass. We loop over key column and process the keys. We look for + * sequences of NULL keys or equal keys. And, at the same time do any processing for the + * non-key-column counting. + * + * NOTE: Both NULL and non-NULL keys have counts for non-key-columns. So, after counting the + * non-NULL fields for the non-key-column, we always do a hash table find/create even when the count + * is 0 since the all those keys must be part of the output result. + + // A key will get created even when there are no non-NULL column values. Count includes 0. + + findOrCreateLongZeroCountKey( + key, + longKeySeries.currentHashCode, + nonNullCount); + + */ +public class + extends VectorGroupByHashKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + protected int countColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + countColumnNum = singleCountAggregation.getCountColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashSingleCountColumnCommonLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- it is either ALL NULL keys or ALL same non-NULL keys. + * + * First, we determine the number of non-NULL values in the non-key column. + * Then, whether ALL NULL keys or ALL same non-NULL keys, we create the key if necessary and + * include the new count. + * + * A NULL key is not in the slot table. It is separately represented by members haveNullKey + * and nullKeyCount. + * + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) + throws HiveException, IOException { + + /* + * First, determine the count of the non-key column for the whole batch which is covered by the + * repeating key. + */ + ColumnVector nonKeyColVector = batch.cols[countColumnNum]; + int nonKeyNonNullCount; + if (nonKeyColVector.noNulls) { + + // NOTE: This may or may not have nonKeyColVector.isRepeating == true. + // Non-Key: [REPEATING,] NO NULLS + nonKeyNonNullCount = inputLogicalSize; + + } else if (nonKeyColVector.isRepeating) { + + // Non-Key: REPEATING, NULLS Possible + nonKeyNonNullCount = (nonKeyColVector.isNull[0] ? 0 : inputLogicalSize); + + } else { + + // Non-Key: NOT REPEATING, NULLS Possible. + boolean[] nonKeyIsNull = nonKeyColVector.isNull; + nonKeyNonNullCount = 0; + if (batch.selectedInUse) { + + int[] selected = batch.selected; + + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + if (nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } else { + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { + if (nonKeyIsNull[batchIndex]) { + nonKeyNonNullCount++; + } + } + } + } + + /* + * Finally, use the non-key non-NULL count for our repeated non-NULL or NULL keys. + */ + if (keyColVector.noNulls || !keyColVector.isNull[0]) { + + // Non-NULL key. +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + nonKeyNonNullCount); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + nonKeyNonNullCount); +#ENDIF SERIALIZE_KEY + } else { + + // All NULL keys. Since we are counting a non-Key column, we must count it under the NULL + // pseudo-entry. + haveNullKey = true; + nullKeyCount += nonKeyNonNullCount; + + } + } + +#INCLUDE GroupByHashSingleKeySingleCountColumnInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeySingleCountColumnInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + ColumnVector keyColumnVector = outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + outputSingleCountForNullSingleKey( + keyColumnVector, countColumnVector, nullKeyCount); + } + +#IF LONG_KEY + outputLongZeroCountKeyAndCountPairs( + (LongColumnVector) keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + (BytesColumnVector) keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyInclude.txt new file mode 100644 index 0000000..e3ce977 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyInclude.txt @@ -0,0 +1,143 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeySingleCountKeyOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } + // Handle last key. +#USE_LINES LAST_NO_NULLS_COUNT_KEY + } + + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys cases we note NULL key exists but leave its count as 0. + * + * Do find/create on each non-NULL key with count count. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count; + if (currKeyIsNull) { + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } else { + count = 1; + } + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + // We don't count NULLs for NULL key. + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 0; + + // We note we encountered a NULL key. But there will be no count for it -- just NULL. + haveNullKey = true; + } + + } else { + +#USE_LINES GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. We don't count NULLs for NULL key. + currKeyIsNull = false; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_KEY_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_KEY + } diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyOperator.txt new file mode 100644 index 0000000..64c9943 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountKeyOperator.txt @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.singlecount.VectorGroupByHashKeySingleCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(key-column) Native Vectorized GroupBy. That is, + * the grouping is being done on one long key and we are counting it. + * + * The NULL key is not represented in the hash table. We handle them as a special case. So, + * the find/create call for non-NULL keys looks like this: + + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashSingleCountKeyCommonLines +#COMMENT +#COMMENT=========================================================================================== +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists but leave its count as 0. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongNonZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SERIALIZE_KEY + } else { + + // We note we encountered a repeating NULL key. But there will be no count for it -- + // just NULL. + haveNullKey = true; + } + } + +#INCLUDE GroupByHashSingleKeySingleCountKeyInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeySingleCountKeyInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + ColumnVector keyColumnVector = outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + + // COUNT(column) does not maintain a count for NULLs and since we are processing the key + // our count is always 0. + outputSingleCountForNullSingleKey( + keyColumnVector, countColumnVector, /* nullKeyCount */ 0); + } + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + (LongColumnVector) keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + (BytesColumnVector) keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarInclude.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarInclude.txt new file mode 100644 index 0000000..da00c40 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarInclude.txt @@ -0,0 +1,134 @@ +#COMMENT +#COMMENT +#COMMENT This file is INCLUDE processed TWICE with LOGICAL_BATCH_PROCESSING TRUE and FALSE +#COMMENT into GroupByHashSingleKeySingleCountStarOperator. +#COMMENT +#COMMENT + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NO NULLS key case. + * + * Do find/create on each key with count count. + */ + private void handleNoNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NO_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES GET_NEXT_KEY +#USE_LINES IF_NEXT_EQUALS_CURRENT + + count++; + } else { + + // Current key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED + + // New current key. +#USE_LINES NEW_CURRENT_KEY + + count = 1; + } + } +#USE_LINES LAST_NO_NULLS_COUNT_STAR + } + + /* + * batch processing (i.e. selectedInUse is true since rows were filtered out) for + * NULLS key case. + * + * For all NULL keys we note NULL key exists AND count it count. + * + * Do find/create on each non-NULL key with count count. + */ + private void handleNullsKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { +#IF LOGICAL_BATCH_PROCESSING + + int[] selected = batch.selected; +#ENDIF LOGICAL_BATCH_PROCESSING + +#USE_LINES KEY_VECTOR_VARIABLES + +#IF LOGICAL_BATCH_PROCESSING +#USE_LINES LOGICAL_NULLS_CURRENT_KEY_VARIABLES +#ELSE +#USE_LINES PHYSICAL_NULLS_CURRENT_KEY_VARIABLES +#ENDIF LOGICAL_BATCH_PROCESSING + + int count = 1; + +#IF LOGICAL_BATCH_PROCESSING + for (int logicalIndex = 1; logicalIndex < inputLogicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; +#ELSE + for (int batchIndex = 1; batchIndex < inputLogicalSize; batchIndex++) { +#ENDIF LOGICAL_BATCH_PROCESSING + + if (keyIsNull[batchIndex]) { + + if (currKeyIsNull) { + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New NULL key. + currKeyIsNull = true; + count = 1; + } + + } else { + +#USE_LINES GET_NEXT_KEY +2 + if (currKeyIsNull) { + + // Current NULL key ended. + currKeyIsNull = false; + + haveNullKey = true; + nullKeyCount += count; + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; +#USE_LINES ELSE_IF_NEXT_EQUALS_CURRENT +2 + + count++; + } else { + + // Current non-NULL key ended. +#USE_LINES CURRENT_COUNT_STAR_ENDED +2 + + // New non-NULL key. +#USE_LINES NEW_CURRENT_KEY +2 + + count = 1; + } + } + } + // Handle last key. +#USE_LINES LAST_NULLS_COUNT_STAR + } \ No newline at end of file diff --git ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarOperator.txt ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarOperator.txt new file mode 100644 index 0000000..c481b71 --- /dev/null +++ ql/src/gen/vectorization/GroupByOperatorTemplates/GroupByHashSingleKeySingleCountStarOperator.txt @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.key.singlecount.VectorGroupByHashKeySingleCountTable; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hive.common.util.HashCodeUtil; + +#USE_LINES SINGLE_KEY_VARIATION_COLUMN_VECTOR_IMPORTS +#USE_LINES KEY_VARIATION_OPERATOR_IMPORTS + +/* + * Specialized class for doing a single key COUNT(*) Native Vectorized GroupBy that is lookup on + * a single long using a specialized hash map. + * + Count Star + + NULL key has separate counter. + + findOrCreateLongNonZeroCountKey( + currentKey, + HashCodeUtil.calculateLongHashCode(currentKey), + count); + + */ +public class + extends VectorGroupByHashKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient boolean haveNullKey; + + protected transient long nullKeyCount; + +#USE_LINES KEY_VARIATION_TRANSIENT + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public () { + super(); + } + + public (CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); +#USE_LINES SINGLE_KEY_VARIATION_INITIALIZE_OP + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveNullKey = false; + nullKeyCount = 0; + } + +#COMMENT=========================================================================================== +#COMMENT +#COMMENT These code line snippets are intended to: +#COMMENT 1) Reduce code duplication +#COMMENT 2) To not incur the cost of calling methods or having abstract objects +#COMMENT 3) And, to not have to attempt parameterize for methods that involve simple locals +#COMMENT 4) Separate the the key variation variables and logic from the common loop logic. +#COMMENT +#INCLUDE GroupByHashCommonLines +#INCLUDE GroupByHashSingleKeyCommonLines +#INCLUDE GroupByHashSingleCountStarCommonLines +#COMMENT + /* + * Repeating key case -- either all NULL keys or all same non-NULL key. + * + * For all NULL keys case we note NULL key exists AND count it. + */ + private void handleRepeatingKey(VectorizedRowBatch batch, final int inputLogicalSize, + keyColVector) throws HiveException, IOException { + + if (keyColVector.noNulls || !keyColVector.isNull[0]) { +#IF LONG_KEY + final long repeatingKey = keyColVector.vector[0]; + findOrCreateLongNonZeroCountKey( + repeatingKey, + HashCodeUtil.calculateLongHashCode(repeatingKey), + inputLogicalSize); +#ENDIF LONG_KEY +#IF STRING_KEY + final byte[] repeatingKey = keyColVector.vector[0]; + final int repeatingKeyStart = keyColVector.start[0]; + final int repeatingKeyLength = keyColVector.length[0]; + findOrCreateBytesKey( + repeatingKey, repeatingKeyStart, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, repeatingKeyStart, repeatingKeyLength), + inputLogicalSize); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, 0); + byte[] repeatingKey = currentKeyOutput.getData(); + int repeatingKeyLength = currentKeyOutput.getLength(); + findOrCreateBytesKey( + repeatingKey, 0, repeatingKeyLength, + HashCodeUtil.calculateBytesHashCode( + repeatingKey, 0, repeatingKeyLength), + inputLogicalSize); +#ENDIF SERIALIZE_KEY + } else { + + // We note we encountered a repeating NULL key. + haveNullKey = true; + nullKeyCount += inputLogicalSize; + } + } + +#INCLUDE GroupByHashSingleKeySingleCountStarInclude LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + +#INCLUDE GroupByHashSingleKeySingleCountStarInclude LOGICAL_BATCH_PROCESSING=false,="Physical",="physical" + + + @Override + protected void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException { + + keyColVector = () batch.cols[keyColumnNum]; + + // When key is repeated we want to short-circuit and finish quickly so we don't have to + // have special repeated key logic later. + if (keyColVector.isRepeating) { + + handleRepeatingKey(batch, inputLogicalSize, keyColVector); + return; + } + + if (batch.selectedInUse) { + + // Map logical to (physical) batch index. + + if (keyColVector.noNulls) { + + // LOGICAL, Key: NO NULLS. + + handleLogicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // LOGICAL, Key: NULLS. + + handleLogicalNullsKey(batch, inputLogicalSize, keyColVector); + } + + } else { + + // NOT selectedInUse. No rows filtered out -- so logical index is the (physical) batch index. + + if (keyColVector.noNulls) { + + // PHYSICAL, Key: NO NULLS. + + handlePhysicalNoNullsKey(batch, inputLogicalSize, keyColVector); + + } else { + + // PHYSICAL, Key: NULLS. + + handlePhysicalNullsKey(batch, inputLogicalSize, keyColVector); + } + } + } + + /** + * Flush all of the key and count pairs of the one long key hash table to the + * output. + */ + @Override + protected void outputGroupBy() throws HiveException { + + // Keys come first in the output. + + ColumnVector keyColumnVector = outputBatch.cols[0]; + + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[1]; + + if (haveNullKey) { + outputSingleCountForNullSingleKey( + keyColumnVector, countColumnVector, nullKeyCount); + } + +#IF LONG_KEY + outputLongNonZeroKeyAndCountPairs( + (LongColumnVector) keyColumnVector, countColumnVector); +#ENDIF LONG_KEY +#IF STRING_KEY + doOutputStringKeyAndCountPairs( + (BytesColumnVector) keyColumnVector, countColumnVector); +#ENDIF STRING_KEY +#IF SERIALIZE_KEY + doOutputSerializeKeyAndCountPairs( + keyColumnVector, countColumnVector); +#ENDIF SERIALIZE_KEY + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index abbbe9a..42a4dad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -719,17 +719,22 @@ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc exprDesc, return expr; } - public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) - throws HiveException { - VectorExpression[] vecExprs = - getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + public static void upConvertDecimal64(VectorExpression[] vecExprs, VectorizationContext vContext) + throws HiveException { final int size = vecExprs.length; for (int i = 0; i < size; i++) { VectorExpression vecExpr = vecExprs[i]; if (vecExpr.getOutputColumnVectorType() == ColumnVector.Type.DECIMAL_64) { - vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr); + vecExprs[i] = wrapWithDecimal64ToDecimalConversion(vecExpr, vContext); } } + } + + public VectorExpression[] getVectorExpressionsUpConvertDecimal64(List exprNodes) + throws HiveException { + VectorExpression[] vecExprs = + getVectorExpressions(exprNodes, VectorExpressionDescriptor.Mode.PROJECTION); + upConvertDecimal64(vecExprs, this); return vecExprs; } @@ -1662,7 +1667,9 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd * The instantiateExpression method sets the output column and type information. */ VectorExpression vectorExpression = - instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments); + instantiateExpression( + vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, this, + arguments); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.DECIMAL_64, arguments); } @@ -1773,8 +1780,9 @@ private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, return createVectorExpression(vclass, childExpr, childrenMode, returnType); } - private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeInfo resultTypeInfo) - throws HiveException { + private static VectorExpression createDecimal64ToDecimalConversion(int colIndex, + TypeInfo resultTypeInfo, VectorizationContext vContext) + throws HiveException { Object [] conversionArgs = new Object[1]; conversionArgs[0] = colIndex; VectorExpression vectorExpression = @@ -1782,6 +1790,7 @@ private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeIn ConvertDecimal64ToDecimal.class, resultTypeInfo, DataTypePhysicalVariation.NONE, + vContext, conversionArgs); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression( @@ -1795,11 +1804,12 @@ private VectorExpression createDecimal64ToDecimalConversion(int colIndex, TypeIn return vectorExpression; } - public VectorExpression wrapWithDecimal64ToDecimalConversion(VectorExpression inputExpression) + public static VectorExpression wrapWithDecimal64ToDecimalConversion( + VectorExpression inputExpression, VectorizationContext vContext) throws HiveException { VectorExpression wrapExpression = createDecimal64ToDecimalConversion( - inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo()); + inputExpression.getOutputColumnNum(), inputExpression.getOutputTypeInfo(), vContext); if (inputExpression instanceof IdentityExpression) { return wrapExpression; } @@ -1845,11 +1855,14 @@ private VectorExpression createVectorExpression(Class vectorClass, // In this method, we must only process non-Decimal64 column vectors. // Convert Decimal64 columns to regular decimal. - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); - if (dataTypePhysicalVariation != null && dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { + DataTypePhysicalVariation dataTypePhysicalVariation = + getDataTypePhysicalVariation(colIndex); + if (dataTypePhysicalVariation != null && + dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { // FUTURE: Can we reuse this conversion? - VectorExpression vChild = createDecimal64ToDecimalConversion(colIndex, childTypeInfo); + VectorExpression vChild = + createDecimal64ToDecimalConversion(colIndex, childTypeInfo, this); children.add(vChild); arguments[i] = vChild.getOutputColumnNum(); @@ -1878,7 +1891,10 @@ private VectorExpression createVectorExpression(Class vectorClass, throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName()); } } - VectorExpression vectorExpression = instantiateExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); + VectorExpression vectorExpression = + instantiateExpression( + vectorClass, returnType, DataTypePhysicalVariation.NONE, this, + arguments); if (vectorExpression == null) { handleCouldNotInstantiateVectorExpression(vectorClass, returnType, DataTypePhysicalVariation.NONE, arguments); } @@ -1897,7 +1913,7 @@ private VectorExpression createVectorExpression(Class vectorClass, return vectorExpression; } - private void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, + private static void handleCouldNotInstantiateVectorExpression(Class vectorClass, TypeInfo returnType, DataTypePhysicalVariation dataTypePhysicalVariation, Object[] arguments) throws HiveException { String displayString = "Could not instantiate vector expression class " + vectorClass.getName() + " for arguments " + Arrays.toString(arguments) + " return type " + @@ -1912,7 +1928,7 @@ private void handleCouldNotInstantiateVectorExpression(Class vectorClass, Typ return VectorExpressionDescriptor.Mode.PROJECTION; } - private String getNewInstanceArgumentString(Object [] args) { + private static String getNewInstanceArgumentString(Object [] args) { if (args == null) { return "arguments: NULL"; } @@ -1952,8 +1968,9 @@ public static String getStackTraceAsSingleLine(Throwable e) { return cleaned; } - private VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, - DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) + private static VectorExpression instantiateExpression(Class vclass, TypeInfo returnTypeInfo, + DataTypePhysicalVariation returnDataTypePhysicalVariation, VectorizationContext vContext, + Object...args) throws HiveException { VectorExpression ve = null; Constructor ctor = getConstructor(vclass); @@ -1963,15 +1980,19 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT try { ve = (VectorExpression) ctor.newInstance(); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with 0 arguments" + + ", exception: " + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength) { try { ve = (VectorExpression) ctor.newInstance(args); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with " + getNewInstanceArgumentString(args) + + ", exception: " + getStackTraceAsSingleLine(ex)); } } else if (numParams == argsLength + 1) { // Additional argument is needed, which is the outputcolumn. @@ -1986,7 +2007,7 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT // Special handling for decimal because decimal types need scale and precision parameter. // This special handling should be avoided by using returnType uniformly for all cases. final int outputColumnNum = - ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); + vContext.ocm.allocateOutputColumn(returnTypeInfo, returnDataTypePhysicalVariation); newArgs = Arrays.copyOf(args, numParams); newArgs[numParams-1] = outputColumnNum; @@ -2000,8 +2021,10 @@ private VectorExpression instantiateExpression(Class vclass, TypeInfo returnT ve.setOutputDataTypePhysicalVariation(returnDataTypePhysicalVariation); } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + - getStackTraceAsSingleLine(ex)); + throw new HiveException( + "Could not instantiate " + vclass.getSimpleName() + + " with arguments " + getNewInstanceArgumentString(newArgs) + + ", exception: " + getStackTraceAsSingleLine(ex)); } } // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. @@ -2732,32 +2755,44 @@ private VectorExpression getCastToDecimal(List childExpr, TypeInfo // Return a constant vector expression Object constantValue = ((ExprNodeConstantDesc) child).getValue(); HiveDecimal decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo()); - return getConstantVectorExpression(decimalValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION); + return getConstantVectorExpression( + decimalValue, returnType, VectorExpressionDescriptor.Mode.PROJECTION); } if (isIntFamily(inputType)) { - return createVectorExpression(CastLongToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + CastLongToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + returnType); } else if (isFloatFamily(inputType)) { - return createVectorExpression(CastDoubleToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + CastDoubleToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + returnType); } else if (decimalTypePattern.matcher(inputType).matches()) { if (child instanceof ExprNodeColumnDesc) { int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child); - DataTypePhysicalVariation dataTypePhysicalVariation = getDataTypePhysicalVariation(colIndex); + DataTypePhysicalVariation dataTypePhysicalVariation = + getDataTypePhysicalVariation(colIndex); if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64) { // Do Decimal64 conversion instead. - return createDecimal64ToDecimalConversion(colIndex, returnType); + return createDecimal64ToDecimalConversion(colIndex, returnType, this); } else { - return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + return createVectorExpression( + CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } } else { - return createVectorExpression(CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + return createVectorExpression( + CastDecimalToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); } } else if (isStringFamily(inputType)) { - return createVectorExpression(CastStringToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + CastStringToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + returnType); } else if (inputType.equals("timestamp")) { - return createVectorExpression(CastTimestampToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, returnType); + return createVectorExpression( + CastTimestampToDecimal.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, + returnType); } return null; } @@ -3548,7 +3583,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio return ts; } - private Constructor getConstructor(Class cl) throws HiveException { + private static Constructor getConstructor(Class cl) throws HiveException { try { Constructor [] ctors = cl.getDeclaredConstructors(); if (ctors.length == 1) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java new file mode 100644 index 0000000..c3bca7d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommon.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * This class is common operator class of Native Vectorized GroupBy that has common + * initialization logic. + */ +public abstract class VectorGroupByCommon + extends Operator + implements VectorizationContextRegion, VectorizationOperator { + + private static final long serialVersionUID = 1L; + + protected VectorGroupByDesc vectorDesc; + + protected VectorGroupByInfo vectorGroupByInfo; + + protected VectorizationContext vContext; + + // Create a new outgoing vectorization context because column name map will change. + protected VectorizationContext vOutContext; + + protected VectorExpression[] groupByKeyExpressions; + + protected VectorAggregationDesc[] vectorAggregationDescs; + + protected AggregationVariation aggregationVariation; + protected SingleCountAggregation singleCountAggregation; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + + // For debug tracing: the name of the map or reduce task. + protected transient String taskName; + + // Debug display. + protected transient long batchCounter; + + public VectorGroupByCommon() { + super(); + } + + public static int INT_PER_LONG_COUNT = Long.SIZE / Integer.SIZE; + + public VectorGroupByCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx); + + GroupByDesc desc = (GroupByDesc) conf; + this.conf = desc; + this.vectorDesc = (VectorGroupByDesc) vectorDesc; + vectorGroupByInfo = this.vectorDesc.getVectorGroupByInfo(); + + this.vContext = vContext; + + vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames()); + + groupByKeyExpressions = this.vectorDesc.getKeyExpressions(); + + vectorAggregationDescs = this.vectorDesc.getVecAggrDescs(); + + aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + singleCountAggregation = vectorGroupByInfo.getSingleCountAggregation(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + // Determine the name of our map or reduce task for debug tracing. + BaseWork work = Utilities.getMapWork(hconf); + if (work == null) { + work = Utilities.getReduceWork(hconf); + } + if (work == null) { + taskName = "none"; + } else { + taskName = work.getName(); + } + + batchCounter = 0; + } + + /** + * Implements the getName function for the Node Interface. + * + * @return the name of the operator + */ + @Override + public String getName() { + return getOperatorName(); + } + + public static String getOperatorName() { + return "GBY"; + } + + @Override + public VectorizationContext getOutputVectorizationContext() { + return vOutContext; + } + + @Override + public VectorizationContext getInputVectorizationContext() { + return vContext; + } + + @Override + public VectorDesc getVectorDesc() { + return vectorDesc; + } + + @Override + public OperatorType getType() { + return OperatorType.GROUPBY; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java new file mode 100644 index 0000000..6f31922 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/VectorGroupByCommonOutput.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +/** + * This class is common operator class of Native Vectorized GroupBy for output generation. + * Taking the aggregations and filling up the output batch. + */ +public abstract class VectorGroupByCommonOutput + extends VectorGroupByCommon { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient VectorizedRowBatch outputBatch; + + private transient VectorizedRowBatchCtx vrbCtx; + + private transient TypeInfo[] outputTypeInfos; + private transient DataTypePhysicalVariation[] outputDataTypePhysicalVariations; + + private transient StandardStructObjectInspector standardOutputObjInspector; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByCommonOutput() { + super(); + } + + public VectorGroupByCommonOutput(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + List objectInspectors = new ArrayList(); + + List outputFieldNames = conf.getOutputColumnNames(); + + final int keyCount = (groupByKeyExpressions == null ? 0 : groupByKeyExpressions.length); + final int aggrCount = (vectorAggregationDescs == null ? 0 : vectorAggregationDescs.length); + final int outputCount = keyCount + aggrCount; + outputTypeInfos = new TypeInfo[outputCount]; + outputDataTypePhysicalVariations = new DataTypePhysicalVariation[outputCount]; + int outputTypesIndex = 0; + + for(int i = 0; i < keyCount; ++i) { + VectorExpression keyExpression = groupByKeyExpressions[i]; + TypeInfo outputTypeInfo = keyExpression.getOutputTypeInfo(); + outputTypeInfos[outputTypesIndex] = outputTypeInfo; + DataTypePhysicalVariation outputDataTypePhysicalVariation = + keyExpression.getOutputDataTypePhysicalVariation(); + outputDataTypePhysicalVariations[outputTypesIndex++] = outputDataTypePhysicalVariation; + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + outputTypeInfo); + objectInspectors.add(objInsp); + } + + for(int i = 0; i < aggrCount; ++i) { + VectorAggregationDesc vecAggrDesc = vectorAggregationDescs[i]; + TypeInfo outputTypeInfo = vecAggrDesc.getOutputTypeInfo(); + outputTypeInfos[outputTypesIndex] = outputTypeInfo; + outputDataTypePhysicalVariations[outputTypesIndex++] = + vecAggrDesc.getOutputDataTypePhysicalVariation(); + ObjectInspector objInsp = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(outputTypeInfo); + objectInspectors.add(objInsp); + } + + standardOutputObjInspector = + ObjectInspectorFactory.getStandardStructObjectInspector(outputFieldNames, objectInspectors); + outputObjInspector = standardOutputObjInspector; + + /** + * Setup the output batch and vectorization context for downstream operators. + */ + vrbCtx = new VectorizedRowBatchCtx( + conf.getOutputColumnNames().toArray(new String[0]), + outputTypeInfos, + outputDataTypePhysicalVariations, + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + vOutContext.getScratchColumnTypeNames(), + vOutContext.getScratchDataTypePhysicalVariations()); + + outputBatch = vrbCtx.createVectorizedRowBatch(); + } + + public void forwardOutputBatch(VectorizedRowBatch outputBatch) throws HiveException { + + forward(outputBatch, null); + + outputBatch.reset(); + } + + /** + * Copy all of the keys and aggregations to the output batch. + */ + protected abstract void outputGroupBy() throws HiveException; + + protected void flushGroupBy() throws HiveException { + outputGroupBy(); + if (outputBatch.size > 0) { + forwardOutputBatch(outputBatch); + } + } + + /** + * On close, make sure a partially filled overflow batch gets forwarded. + */ + @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + flushGroupBy(); + } + LOG.debug("VectorGroupByCommonOutputOperator closeOp " + batchCounter + " batches processed"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java new file mode 100644 index 0000000..d0dc5f5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashCommon.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.groupby.VectorGroupByCommonOutput; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash operator class of Native Vectorized GroupBy for hash related + * initialization logic. + */ +public abstract class VectorGroupByHashCommon + extends VectorGroupByCommonOutput { + + private static final long serialVersionUID = 1L; + private static final String CLASS_NAME = VectorGroupByHashCommon.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected transient long hashGroupByMemoryAvailableByteLength; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashCommon() { + super(); + } + + public VectorGroupByHashCommon(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final float memoryPercentage = conf.getGroupByMemoryUsage(); + final int testMaxMemoryAvailable = vectorGroupByInfo.getTestGroupByMaxMemoryAvailable(); + final long maxMemoryAvailable = + (testMaxMemoryAvailable == -1 ? + conf.getMaxMemoryAvailable() : testMaxMemoryAvailable); + hashGroupByMemoryAvailableByteLength = (long) (memoryPercentage * maxMemoryAvailable); + } + + /* + * Return the power of 2 that is equal to or next below a value. + * + * Example: + * 100000b = 2^5 = 32 + * where Long.numberOfLeadingZeros returns (64 - 6) = 58 + * and the result = 5. + * + * Replacing any set of lower 0's with 1's doesn't change the result. + * Or, numbers 32 to 63 return 5. + * + */ + public static int floorPowerOf2(long a) { + if (a == 0) { + return 0; + } + final int floorLeadingZerosCount = Long.numberOfLeadingZeros(a); + final int result = Long.SIZE - floorLeadingZerosCount - 1; + return result; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java new file mode 100644 index 0000000..80331fe --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashOperatorBase.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hive.common.util.HashCodeUtil; + +/** + * This class is common hash operator class of Native Vectorized GroupBy with common operator + * logic for checking key limits and the common process method logic. + */ +public abstract class VectorGroupByHashOperatorBase + extends VectorGroupByHashTable { + + private static final long serialVersionUID = 1L; + + // Non-transient members initialized by the constructor. They cannot be final due to Kryo. + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashOperatorBase() { + super(); + } + + public VectorGroupByHashOperatorBase(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + determineInitialHashTableSize(); + + allocateHashTable(); + } + + protected void doBeforeMainLoopWork(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * If the hash table has less than the worst-case inputLogicalSize keys that + * could be added, then flush the current hash table entries and clear it. + */ + checkKeyLimitOncePerBatch(inputLogicalSize); + } + + protected abstract void doMainLoop(VectorizedRowBatch batch, final int inputLogicalSize) + throws HiveException, IOException; + + /* + * Common process method that does common work then drives the specialized Operator classes with + * the doBeforeMainLoopWork and doMainLoop overrides. + */ + @Override + public void process(Object row, int tag) throws HiveException { + + try { + VectorizedRowBatch batch = (VectorizedRowBatch) row; + + batchCounter++; + + final int inputLogicalSize = batch.size; + + if (inputLogicalSize == 0) { + return; + } + + /* + * Perform any key expressions. Results will go into scratch columns. + */ + if (groupByKeyExpressions != null) { + for (VectorExpression ve : groupByKeyExpressions) { + ve.evaluate(batch); + } + } + + doBeforeMainLoopWork(inputLogicalSize); + + doMainLoop(batch, inputLogicalSize); + + } catch (Exception e) { + throw new HiveException(e); + } + } + + protected void outputSingleCountForNullSingleKey(ColumnVector keyColumnVector, + LongColumnVector countColumnVector, long nullKeyCount) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int nullBatchIndex = outputBatch.size; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + + countColumnVector.isNull[nullBatchIndex] = false; + countColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } + + protected void outputSingleCountForNullMultiKey(long nullKeyCount) + throws HiveException { + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + final int keySize = groupByKeyExpressions.length; + final int nullBatchIndex = outputBatch.size; + for (int i = 0; i < keySize; i++) { + ColumnVector keyColumnVector = outputBatch.cols[i]; + keyColumnVector.isNull[nullBatchIndex] = true; + keyColumnVector.noNulls = false; + } + + LongColumnVector countKeyColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + countKeyColumnVector.isNull[nullBatchIndex] = false; + countKeyColumnVector.vector[nullBatchIndex] = nullKeyCount; + + outputBatch.size++; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java new file mode 100644 index 0000000..44f646c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/VectorGroupByHashTable.java @@ -0,0 +1,350 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.HashTableKeyType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is common hash table class of Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashTable + extends VectorGroupByHashCommon { + + private static final long serialVersionUID = 1L; + + private static final String CLASS_NAME = VectorGroupByHashTable.class.getName(); + private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + private boolean isBytesHashTable; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + // How many times we encountered a limit on the hash table and had to flush and recreate. + private long flushAndRecreateCount; + + // Memory available in bytes for the slot table, and when we have bytes keys, the memory available + // for the key store. + protected transient long hashTableMemoryAvailableByteLength; + protected transient long keyStoreMemoryAvailableByteLength; + + // The logical size and power of 2 mask of the hash table + protected transient int logicalHashBucketCount; + protected transient int logicalHashBucketMask; + + // The number of longs in the hash table slot array. It is the logical size * entries per slot. + protected int slotPhysicalArraySize; + + // The maximum number of keys we'll keep in the hash table before flushing. + protected transient int hashTableKeyCountLimit; + + // The slot table with 1, 2, 3, etc longs per entry. + protected transient long[] slotMultiples; + + // The key count and largest number of misses in our quadratic probing style hash table. + // Maintained by the hash table variations. + protected transient int keyCount; + protected transient int largestNumberOfSteps; + + // Byte length for WriteBuffers segments in the VectorKeyStore used for bytes keys + protected transient int keyStoreByteSize; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashTable() { + super(); + } + + public VectorGroupByHashTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + isBytesHashTable = + (this.vectorDesc.getVectorGroupByInfo().getHashTableKeyType() != HashTableKeyType.LONG); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + flushAndRecreateCount = 0; + + divvyUpHashGroupByMemory(); + } + + public long getFlushAndStartOverCount() { + return flushAndRecreateCount; + } + + public abstract int getHashTableMultiple(); + + /* + * Decide how to apportion memory for the slot table, and for the key store when we have bytes + * keys. (Single long keys are stored in the slot table). + */ + private void divvyUpHashGroupByMemory() { + + /* + * CONCERN: + * Do we really want a hash table to use the maximum supplied memory immediately? + * That could waste memory that other operators could use. And, cause Java GC + * issues because of how large the single slot table array is. Large hash tables + * with small keys sets could cause lots of unnecessary cold RAM hits. There is a tension + * here, of course. Too small a table and there will be more insert collisions. + * + * In contrast, the current VectorGroupByOperator and GroupByOperator classes use a + * Java HeapMap which automatically grows over time. + * + * The issues here are similar to MapJoin, except we have the possibility of using a smaller + * hash table and flushing everything to Reduce. Then, creating a larger slot table instead + * of zeroing the current one. MapJoin cannot flush -- it either needs to expand its + * hash tables to hold everything or spill some of the data to secondary storage (Hybrid Grace). + */ + + if (isBytesHashTable) { + + // UNDONE: Use key size estimates to make better decision than half... + final long half = hashGroupByMemoryAvailableByteLength / 2; + hashTableMemoryAvailableByteLength = half; + keyStoreMemoryAvailableByteLength = half; + } else { + hashTableMemoryAvailableByteLength = hashGroupByMemoryAvailableByteLength; + keyStoreMemoryAvailableByteLength = 0; + } + } + + //------------------------------------------------------------------------------------------------ + + private static final int LARGEST_NUMBER_OF_STEPS_THRESHOLD = 6; + + public boolean isAboveLargestNumberOfStepsThresold() { + return (largestNumberOfSteps > LARGEST_NUMBER_OF_STEPS_THRESHOLD); + } + + /* + * Do override this method in specialized hash tables that have more to initialize and/or create. + */ + public void allocateHashTable() throws HiveException { + allocateBucketArray(); + } + + /* + * Allocate the key store when we have bytes keys. + */ + public VectorKeyStore allocateVectorKeyStore(VectorKeyStore keyStore) { + if (keyStore == null) { + return new VectorKeyStore(keyStoreByteSize); + } else { + keyStore.clear(); + return keyStore; + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void releaseHashTableMemory() throws HiveException { + if (slotMultiples.length == slotPhysicalArraySize) { + + // Keep it and clear it later. + return; + } + slotMultiples = null; + } + + // Since a maximum integer is 2^N - 2 it cannot be used we need one less than number of + // Integer bits. 2^30 = 1,073,741,824 + private static final int MAX_POWER_OF_2_FOR_INT_INDEXING = Integer.SIZE - 2; + + // An arbitrary factor to divide the slot table size by to get the key count limit. + // Hitting the key count limit will cause the hash table to be flushed to Reduce and cleared + // for refilling. + private static final int KEY_COUNT_FACTOR = 8; + + // Make sure we have comfortable room for at least one batch of new keys to support the + // VectorGroupByHashOperatorBase.checkKeyLimitOncePerBatch method. + private static final int MIN_HASH_TABLE_BYTE_LENGTH = + VectorizedRowBatch.DEFAULT_SIZE * KEY_COUNT_FACTOR * (Long.SIZE / Byte.SIZE); + private static final int MIN_POWER_OF_2 = floorPowerOf2(MIN_HASH_TABLE_BYTE_LENGTH); + + /* + * Determine the size for the slot table and, for bytes keys the key store. + */ + public void determineInitialHashTableSize() throws HiveException { + + /* + * Slot table size. + */ + + final int multiple = getHashTableMultiple(); + + // Take in account our multiple. + final int floorPowerOf2MaxHashTableMemoryByteLength = + floorPowerOf2(hashTableMemoryAvailableByteLength / multiple); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2HashTableMemoryByteLength = + Math.min(floorPowerOf2MaxHashTableMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + // UNDONE: Artificially limit for now... 2^24 = 16,777,216 bytes. + maxPowerOf2HashTableMemoryByteLength = Math.min(maxPowerOf2HashTableMemoryByteLength, 24); + + final int powerOf2HashTableMemoryByteLength = + Math.max(maxPowerOf2HashTableMemoryByteLength, MIN_POWER_OF_2); + + final int hashTableByteSize = (1 << powerOf2HashTableMemoryByteLength); + final int hashTableLongSize = hashTableByteSize / (Long.SIZE / Byte.SIZE); + + logicalHashBucketCount = hashTableLongSize; + + slotPhysicalArraySize = logicalHashBucketCount * multiple; + + /* + * Key store size. + */ + + if (isBytesHashTable) { + final int floorPowerOf2MaxKeyStoreMemoryByteLength = + floorPowerOf2(keyStoreMemoryAvailableByteLength); + + // No matter how much memory they want to give us, our array is limited to int indexing. + int maxPowerOf2KeyStoreMemoryByteLength = + Math.min(floorPowerOf2MaxKeyStoreMemoryByteLength, MAX_POWER_OF_2_FOR_INT_INDEXING); + + keyStoreByteSize = (1 << maxPowerOf2KeyStoreMemoryByteLength); + + // CONSIDER: Better min/max limits. + keyStoreByteSize = Math.min(keyStoreByteSize, 1024 * 1024); + keyStoreByteSize = Math.max(keyStoreByteSize, 128 * 1024); + } + + if (!isBytesHashTable) { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple); + } else { + LOG.info( + "Logical slot table size " + logicalHashBucketCount + + " multiple " + multiple + + " key store size " + keyStoreByteSize); + } + } + + /* + * When flushing and recreating, release the memory when the slot table is changing size, etc. + */ + public void determineNextHashTableSize() throws HiveException { + // CONSIDER: Growing the hash table size upon examining current hash table. + } + + /* + * For now, we are just allocating the slot table array. + * FUTURE: We'll need to revisit these calculations when we support STRING keys. + */ + protected void allocateBucketArray() { + if (slotMultiples != null) { + + // The releaseHashTableMemory method kept same size array, so just clear it. + Arrays.fill(slotMultiples, 0); + } else { + + logicalHashBucketMask = logicalHashBucketCount - 1; + + hashTableKeyCountLimit = logicalHashBucketCount / KEY_COUNT_FACTOR; + + slotMultiples = new long[slotPhysicalArraySize]; + } + + keyCount = 0; + largestNumberOfSteps = 0; + + if (flushAndRecreateCount != 0) { + LOG.info("FLush and recreate #" + flushAndRecreateCount); + } + } + + /* + * Check the worst case possibility -- adding a new key for each row in the batch -- and flush + * and recreate the hash table. + */ + protected void checkKeyLimitOncePerBatch(final int inputLogicalSize) + throws HiveException, IOException { + + /* + * Check the hash table key limit for doing the worst case of adding all keys outside the + * inner loop for better performance. + */ + final boolean isReachedKeyLimit = + (keyCount + inputLogicalSize > hashTableKeyCountLimit); + if (isReachedKeyLimit || isAboveLargestNumberOfStepsThresold()) { + LOG.info( + "Reached key limit " + isReachedKeyLimit + + ", above largest number of steps thresold " + isAboveLargestNumberOfStepsThresold()); + + flushAndRecreateCount++; + flushAndRecreate(); + if (keyCount + inputLogicalSize > hashTableKeyCountLimit) { + + // Hash table is way too small. + raise2ndHitOutOfStorage(); + } + } + } + + protected void raise2ndHitOutOfStorage() throws HiveException { + throw new HiveException( + "After flushing hash table and clearing, there still isn't enough storage?"); + } + + protected void flushAndRecreate() throws HiveException, IOException { + + /* + * 1) Flush hash table. + * 2) Use current state to determine next sizes. + * 3) Release memory, if necessary. + * 4) Recreate/clear using next sizes. + */ + + flushGroupBy(); + + // Based on current hash table sizes and perhaps historical information, determine + // the size to use next during recreation. + determineNextHashTableSize(); + + releaseHashTableMemory(); + + allocateHashTable(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java new file mode 100644 index 0000000..63a6e66 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/duplicatereduction/VectorGroupByHashBytesKeyDuplicateReductionTable.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/* + * A single bytes key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeyDuplicateReductionTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashBytesKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static final int BYTES_DUPLICATE_REDUCTION_ENTRY_SIZE = 2; + + public void findOrCreateBytesDuplicateReductionKey(byte[] keyBytes, int keyStart, int keyLength, + long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int pairIndex; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex] == 0) { + break; + } + if (hashCode == slotMultiples[pairIndex + 1] && + keyStore.unsafeEqualKey(slotMultiples[pairIndex], keyBytes, keyStart, keyLength)) { + // Found it! A duplicate has now been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + + // First entry. + slotMultiples[pairIndex] = keyStore.add(keyBytes, keyStart, keyLength); + slotMultiples[pairIndex + 1] = hashCode; + + keyCount++; + + } + + private int countKeyPairIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + + protected int initBytesKeyIterator() { + countKeyPairIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[countKeyPairIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + + countKeyPairIndex += 2; + return; + } + countKeyPairIndex += 2; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/singlecount/VectorGroupByHashBytesKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/singlecount/VectorGroupByHashBytesKeySingleCountTable.java new file mode 100644 index 0000000..5a94e1a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/byteskey/singlecount/VectorGroupByHashBytesKeySingleCountTable.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * A single bytes key hash table optimized for a single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashBytesKeySingleCountTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorKeyStore keyStore; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashBytesKeySingleCountTable() { + super(); + } + + public VectorGroupByHashBytesKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + keyStore = allocateVectorKeyStore(keyStore); + } + + @Override + public void releaseHashTableMemory() throws HiveException { + super.releaseHashTableMemory(); + + keyStore = null; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return BYTES_ENTRY_SIZE; + } + + protected static final int BYTES_ENTRY_SIZE = 3; + + public void findOrCreateBytesKey(byte[] keyBytes, int keyStart, int keyLength, + long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + int tripleIndex; + boolean isNewKey; + while (true) { + tripleIndex = 3 * slot; + if (slotMultiples[tripleIndex] == 0) { + isNewKey = true; + break; + } + if (hashCode == slotMultiples[tripleIndex + 1] && + keyStore.unsafeEqualKey(slotMultiples[tripleIndex], keyBytes, keyStart, keyLength)) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int) (probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + + // First entry. + slotMultiples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); + slotMultiples[tripleIndex + 1] = hashCode; + slotMultiples[tripleIndex + 2] = count; + + keyCount++; + + } else if (count > 0) { + + slotMultiples[tripleIndex + 2] += count; + } + } + + private int countKeyTripleIndex; + private WriteBuffers.Position keyReadPos; + private ByteSegmentRef keyByteSegmentRef; + private long currentCountKeyCount; + + protected int initBytesKeyIterator() { + countKeyTripleIndex = 0; + keyReadPos = new WriteBuffers.Position(); + keyByteSegmentRef = new ByteSegmentRef(); + currentCountKeyCount = 0; + return keyCount; + } + + // Read next key. + protected void readNext() { + while (true) { + final long keyRef = slotMultiples[countKeyTripleIndex]; + if (keyRef != 0) { + keyStore.getKey( + keyRef, + keyByteSegmentRef, + keyReadPos); + currentCountKeyCount = slotMultiples[countKeyTripleIndex + 2]; + + countKeyTripleIndex += 3; + return; + } + countKeyTripleIndex += 3; + } + } + + public byte[] getKeyBytes() { + return keyByteSegmentRef.getBytes(); + } + + public int getKeyBytesOffset() { + return (int) keyByteSegmentRef.getOffset(); + } + + public int getKeyBytesLength() { + return keyByteSegmentRef.getLength(); + } + + public long getCount() { + return currentCountKeyCount; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java new file mode 100644 index 0000000..48b0b01 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/duplicatereduction/VectorGroupByHashLongKeyDuplicateReductionTable.java @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * A single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashLongKeyDuplicateReductionTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + protected boolean haveZeroKey; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + + haveZeroKey = false; + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_DUPLICATE_REDUCTION_ENTRY_SIZE; + } + + protected static int LONG_DUPLICATE_REDUCTION_ENTRY_SIZE = 1; + + public void findOrCreateLongDuplicateReductionKey(long key, long hashCode) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + while (true) { + if (slotMultiples[slot] == 0) { + break; + } + if (key == slotMultiples[slot]) { + // Found it! A duplicate has now been eliminated. + return; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + // Create first-time key. + slotMultiples[slot] = key; + keyCount++; + } + + private int countKeyIndex; + + protected int initLongDuplicateReductionKeyIterator() { + countKeyIndex = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNext() { + while (true) { + long key = slotMultiples[countKeyIndex++]; + if (key != 0) { + return key; + } + } + } + + protected void doOutputLongKeys( + LongColumnVector keyColumnVector) throws HiveException { + + long[] keyVector = keyColumnVector.vector; + + if (haveZeroKey) { + + // Zero key to deal with. + + // Is the outputBatch already full? + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + keyVector[outputBatch.size++] = 0; + } + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongDuplicateReductionKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNext(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/singlecount/VectorGroupByHashLongKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/singlecount/VectorGroupByHashLongKeySingleCountTable.java new file mode 100644 index 0000000..52564fb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/longkey/singlecount/VectorGroupByHashLongKeySingleCountTable.java @@ -0,0 +1,302 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.longkey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.VectorGroupByHashOperatorBase; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/** + * Single long key hash table optimized for: + * 1) COUNT(*) Native Vectorized GroupBy. + * 2) COUNT(key-column) and COUNT(non-key-column) + * Native Vectorized GroupBy + */ +public abstract class VectorGroupByHashLongKeySingleCountTable + extends VectorGroupByHashOperatorBase { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashLongKeySingleCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashLongKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + @Override + public void allocateHashTable() throws HiveException { + super.allocateHashTable(); + } + + //------------------------------------------------------------------------------------------------ + + public int getHashTableMultiple() { + return LONG_NON_ZERO_COUNT_ENTRY_SIZE; + } + + protected static final int LONG_NON_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongNonZeroCountKey(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + + private int nonZeroCountPairIndex; + private long currentNonZeroCount; + + protected int initLongNonZeroCountKeyIterator() { + nonZeroCountPairIndex = 0; + currentNonZeroCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextNonZeroCountKey() { + while (true) { + long count = slotMultiples[nonZeroCountPairIndex + 1]; + if (count > 0) { + currentNonZeroCount = count; + long key = slotMultiples[nonZeroCountPairIndex]; + nonZeroCountPairIndex += 2; + return key; + } + nonZeroCountPairIndex += 2; + } + } + + public long getLongNonZeroCount() { + return currentNonZeroCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key non-zero count hash table to the + * output. + */ + protected void outputLongNonZeroKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongNonZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int i = startBatchIndex; i < startBatchIndex + count; i++) { + keyVector[i] = getNextNonZeroCountKey(); + countVector[i] = getLongNonZeroCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } + + //------------------------------------------------------------------------------------------------ + + private static long LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK = 1L << 63; + + protected static int LONG_ZERO_COUNT_ENTRY_SIZE = 2; + + public void findOrCreateLongZeroCountKey(long key, long hashCode, int count) + throws HiveException, IOException { + + int intHashCode = (int) hashCode; + int slot = (intHashCode & logicalHashBucketMask); + long probeSlot = slot; + int i = 0; + boolean isNewKey; + int pairIndex = 0; + while (true) { + pairIndex = 2 * slot; + if (slotMultiples[pairIndex + 1] == 0) { + isNewKey = true; + break; + } + if (key == slotMultiples[pairIndex]) { + isNewKey = false; + break; + } + // Some other key (collision) - keep probing. + probeSlot += (++i); + if (largestNumberOfSteps < i) { + largestNumberOfSteps = i; + } + slot = (int)(probeSlot & logicalHashBucketMask); + } + + if (isNewKey) { + slotMultiples[pairIndex] = key; + keyCount++; + if (count == 0) { + slotMultiples[pairIndex + 1] = LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK; + } else { + slotMultiples[pairIndex + 1] = count; + } + } else if (count > 0) { + + // Only update count when we are leaving 0. + if (slotMultiples[pairIndex + 1] == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + slotMultiples[pairIndex + 1] = count; + } else { + slotMultiples[pairIndex + 1] += count; + } + } + } + + private int countKeyPairIndex; + private long currentCountKeyCount; + + protected int initLongZeroCountKeyIterator() { + countKeyPairIndex = 0; + currentCountKeyCount = 0; + return keyCount; + } + + // Find next key and return it. + protected long getNextZeroCountKey() { + while (true) { + long count = slotMultiples[countKeyPairIndex + 1]; + if (count != 0) { + if (count == LONG_KEY_COUNT_KEY_ZERO_HAS_VALUE_MASK) { + currentCountKeyCount = 0; + } else { + currentCountKeyCount = count; + } + long key = slotMultiples[countKeyPairIndex]; + countKeyPairIndex += 2; + return key; + } + countKeyPairIndex += 2; + } + } + + public long getCount() { + return currentCountKeyCount; + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one long key zero count hash table to the + * output. + */ + protected void outputLongZeroCountKeyAndCountPairs( + LongColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + long[] keyVector = keyColumnVector.vector; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and pull long key and count out of each + // slot entry and store in the output batch. + int keyCount = initLongZeroCountKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + keyIsNull[batchIndex] = false; + keyVector[batchIndex] = getNextZeroCountKey(); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/duplicatereduction/VectorGroupByHashMultiKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/duplicatereduction/VectorGroupByHashMultiKeyDuplicateReductionTable.java new file mode 100644 index 0000000..4729bf0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/duplicatereduction/VectorGroupByHashMultiKeyDuplicateReductionTable.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashMultiKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeyDuplicateReductionTable() { + super(); + } + + public VectorGroupByHashMultiKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final int size = groupByKeyExpressions.length; + TypeInfo[] typeInfos = new TypeInfo[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + } + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Multi-key is starting at output column 0. + keyVectorDeserializeRow.init(0); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeys() throws HiveException { + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/singlecount/VectorGroupByHashMultiKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/singlecount/VectorGroupByHashMultiKeySingleCountTable.java new file mode 100644 index 0000000..2447ff8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/multikey/singlecount/VectorGroupByHashMultiKeySingleCountTable.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.multikey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount.VectorGroupByHashBytesKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashMultiKeySingleCountTable + extends VectorGroupByHashBytesKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashMultiKeySingleCountTable() { + super(); + } + + public VectorGroupByHashMultiKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + final int size = groupByKeyExpressions.length; + TypeInfo[] typeInfos = new TypeInfo[size]; + for (int i = 0; i < size; i++) { + VectorExpression keyExpr = groupByKeyExpressions[i]; + typeInfos[i] = keyExpr.getOutputTypeInfo(); + } + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Multi-key is starting at output column 0. + keyVectorDeserializeRow.init(0); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputMultiKeyAndCounts() throws HiveException { + + final int keySize = groupByKeyExpressions.length; + LongColumnVector countColumnVector = (LongColumnVector) outputBatch.cols[keySize]; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/duplicatereduction/VectorGroupByHashSerializeKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/duplicatereduction/VectorGroupByHashSerializeKeyDuplicateReductionTable.java new file mode 100644 index 0000000..d6cc41d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/duplicatereduction/VectorGroupByHashSerializeKeyDuplicateReductionTable.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.serializekey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single long key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSerializeKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSerializeKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSerializeKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSerializeKeys( + ColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/singlecount/VectorGroupByHashSerializeKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/singlecount/VectorGroupByHashSerializeKeySingleCountTable.java new file mode 100644 index 0000000..0d819c2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/serializekey/singlecount/VectorGroupByHashSerializeKeySingleCountTable.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.serializekey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount.VectorGroupByHashBytesKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * An single serialized key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashSerializeKeySingleCountTable + extends VectorGroupByHashBytesKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + private transient VectorDeserializeRow keyVectorDeserializeRow; + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashSerializeKeySingleCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashSerializeKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + TypeInfo[] typeInfos = new TypeInfo[] { groupByKeyExpressions[0].getOutputTypeInfo() }; + keyVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + typeInfos, + /* useExternalBuffer */ true)); + // Single key is output column 0. + keyVectorDeserializeRow.init(new int[] { 0 }); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string hash table to the output. + */ + protected void doOutputSerializeKeyAndCountPairs( + ColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyVectorDeserializeRow.setBytes( + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + keyVectorDeserializeRow.deserializeByRef(outputBatch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + keyVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java new file mode 100644 index 0000000..c51f0db --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/duplicatereduction/VectorGroupByHashStringKeyDuplicateReductionTable.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.duplicatereduction; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.duplicatereduction.VectorGroupByHashBytesKeyDuplicateReductionTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/*U + * An single long string key hash table optimized for duplicate reduction Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeyDuplicateReductionTable + extends VectorGroupByHashBytesKeyDuplicateReductionTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeyDuplicateReductionTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeyDuplicateReductionTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeys( + BytesColumnVector keyColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + } + outputBatch.size += count; + keyCount -= count; + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/singlecount/VectorGroupByHashStringKeySingleCountTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/singlecount/VectorGroupByHashStringKeySingleCountTable.java new file mode 100644 index 0000000..3c281b6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/groupby/hash/stringkey/singlecount/VectorGroupByHashStringKeySingleCountTable.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.groupby.hash.stringkey.singlecount; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.groupby.hash.byteskey.singlecount.VectorGroupByHashBytesKeySingleCountTable; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * An single string key hash table optimized for single count Native Vectorized GroupBy. + */ +public abstract class VectorGroupByHashStringKeySingleCountTable + extends VectorGroupByHashBytesKeySingleCountTable { + + private static final long serialVersionUID = 1L; + + protected int keyColumnNum; + + // The above members are initialized by the constructor and must not be + // transient. + //--------------------------------------------------------------------------- + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + public VectorGroupByHashStringKeySingleCountTable() { + super(); + + keyColumnNum = -1; + } + + public VectorGroupByHashStringKeySingleCountTable(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + + keyColumnNum = groupByKeyExpressions[0].getOutputColumnNum(); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + } + + //------------------------------------------------------------------------------------------------ + + /** + * Flush all of the key and count pairs of the one string key zero count hash table to the + * output. + */ + protected void doOutputStringKeyAndCountPairs( + BytesColumnVector keyColumnVector, + LongColumnVector countColumnVector) throws HiveException { + + boolean[] keyIsNull = keyColumnVector.isNull; + boolean[] countIsNull = countColumnVector.isNull; + long[] countVector = countColumnVector.vector; + + // Use the iterator to race down the slot table array and get the bytes key and count out of + // each slot entry and store in the output batch. + int keyCount = initBytesKeyIterator(); + while (keyCount > 0) { + if (outputBatch.size == outputBatch.DEFAULT_SIZE) { + forwardOutputBatch(outputBatch); + } + + int startBatchIndex = outputBatch.size; + int count = Math.min(keyCount, outputBatch.DEFAULT_SIZE - startBatchIndex); + + for (int batchIndex = startBatchIndex; batchIndex < startBatchIndex + count; batchIndex++) { + readNext(); + keyIsNull[batchIndex] = false; + keyColumnVector.setRef( + batchIndex, + getKeyBytes(), getKeyBytesOffset(), getKeyBytesLength()); + countIsNull[batchIndex] = false; + countVector[batchIndex] = getCount(); + } + outputBatch.size += count; + keyCount -= count; + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java new file mode 100644 index 0000000..2eb311b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keystore/VectorKeyStore.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.keystore; + +import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; + +/** + * Optimized for sequential key lookup. + */ + +public class VectorKeyStore implements MemoryEstimate { + + private WriteBuffers writeBuffers; + + private WriteBuffers.Position unsafeReadPos; // Thread-unsafe position used at write time. + + /** + * A store for arbitrary length keys in memory. + * + * The memory is a "infinite" byte array or WriteBuffers object. + * + * We give the client a 64-bit (long) key reference to keep that has the offset within + * the "infinite" byte array of the key. + * + * We optimize the common case when keys are short and store the key length in the key reference + * word. + * + * If the key is big, the big length will be encoded as an integer at the beginning of the key + * followed by the big key bytes. + */ + + /** + * Bit-length fields within a 64-bit (long) key reference. + * + * Lowest field: An absolute byte offset the the key in the WriteBuffers. + * + * Next field: For short keys, the length of the key. Otherwise, a special constant + * indicating a big key whose length is stored with the key. + * + * Last field: an always on bit to insure the key reference non-zero when the offset and + * length are zero. + */ + + /* + * The absolute offset to the beginning of the key within the WriteBuffers. + */ + private final class AbsoluteKeyOffset { + private static final int bitLength = 40; + private static final long allBitsOn = (((long) 1) << bitLength) - 1; + private static final long bitMask = allBitsOn; + + // Make it a power of 2 by backing down (i.e. the -2). + private static final long maxSize = ((long) 1) << (bitLength - 2); + } + + /* + * The small key length. + * + * If the key is big (i.e. length >= allBitsOn), then the key length is stored in the + * WriteBuffers. + */ + private final class SmallKeyLength { + private static final int bitLength = 20; + private static final int allBitsOn = (1 << bitLength) - 1; + private static final int threshold = allBitsOn; // Lower this for big key testing. + private static final int bitShift = AbsoluteKeyOffset.bitLength; + private static final long bitMask = ((long) allBitsOn) << bitShift; + private static final long allBitsOnBitShifted = ((long) allBitsOn) << bitShift; + } + + /* + * An always on bit to insure the key reference non-zero. + */ + private final class IsNonZeroFlag { + private static final int bitShift = SmallKeyLength.bitShift + SmallKeyLength.bitLength;; + private static final long flagOnMask = ((long) 1) << bitShift; + } + + public long add(byte[] keyBytes, int keyStart, int keyLength) { + boolean isKeyLengthBig = (keyLength >= SmallKeyLength.threshold); + + long absoluteKeyOffset = writeBuffers.getWritePoint(); + if (isKeyLengthBig) { + writeBuffers.writeVInt(keyLength); + } + writeBuffers.write(keyBytes, keyStart, keyLength); + + long keyRefWord = IsNonZeroFlag.flagOnMask; + if (isKeyLengthBig) { + keyRefWord |= SmallKeyLength.allBitsOnBitShifted; + } else { + keyRefWord |= ((long) keyLength) << SmallKeyLength.bitShift; + } + keyRefWord |= absoluteKeyOffset; + + // LOG.debug("VectorMapJoinFastKeyStore add keyLength " + keyLength + " absoluteKeyOffset " + absoluteKeyOffset + " keyRefWord " + Long.toHexString(keyRefWord)); + return keyRefWord; + } + + /** THIS METHOD IS NOT THREAD-SAFE. Use only at load time (or be mindful of thread safety). */ + public boolean unsafeEqualKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength) { + return equalKey(keyRefWord, keyBytes, keyStart, keyLength, unsafeReadPos); + } + + public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + if (isKeyLengthSmall && storedKeyLength != keyLength) { + return false; + } + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + if (storedKeyLength != keyLength) { + return false; + } + } + + // Our reading is positioned to the key. + if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { + // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); + return false; + } + + return true; + } + + public VectorKeyStore(int writeBuffersSize) { + writeBuffers = new WriteBuffers(writeBuffersSize, AbsoluteKeyOffset.maxSize); + unsafeReadPos = new WriteBuffers.Position(); + } + + public VectorKeyStore(WriteBuffers writeBuffers) { + // TODO: Check if maximum size compatible with AbsoluteKeyOffset.maxSize. + this.writeBuffers = writeBuffers; + unsafeReadPos = new WriteBuffers.Position(); + } + + public void clear() { + writeBuffers.clear(); + unsafeReadPos.clear(); + } + + @Override + public long getEstimatedMemorySize() { + long size = 0; + size += writeBuffers == null ? 0 : writeBuffers.getEstimatedMemorySize(); + size += unsafeReadPos == null ? 0 : unsafeReadPos.getEstimatedMemorySize(); + return size; + } + + /* + * Get a get from the store given a key reference. + * The private readPos makes the read safe for shared-memory usage. + */ + public void getKey(long keyRefWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 57db136..d251aa5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -23,6 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -105,7 +106,7 @@ public VectorMapJoinFastBytesHashMap( valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); // Share the same write buffers with our value store. - keyStore = new VectorMapJoinFastKeyStore(valueStore.writeBuffers()); + keyStore = new VectorKeyStore(valueStore.writeBuffers()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 726fd29..b284a83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -23,6 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -57,10 +58,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } @@ -95,7 +94,7 @@ public VectorMapJoinFastBytesHashMultiSet( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize); + keyStore = new VectorKeyStore(writeBuffersSize); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 5d750a8..52801e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -21,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.io.BytesWritable; @@ -82,7 +83,7 @@ public VectorMapJoinFastBytesHashSet( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - keyStore = new VectorMapJoinFastKeyStore(writeBuffersSize); + keyStore = new VectorKeyStore(writeBuffersSize); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f..15dd125 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.exec.vector.keystore.VectorKeyStore; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; @@ -40,7 +41,7 @@ private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastBytesHashTable.class); - protected VectorMapJoinFastKeyStore keyStore; + protected VectorKeyStore keyStore; protected BytesWritable testKeyBytesWritable; @@ -68,15 +69,13 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr int i = 0; boolean isNewKey; while (true) { - int tripleIndex = 3 * slot; + final int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.unsafeEqualKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 068f25e..8929b61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -93,6 +93,26 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashDecimal64KeySingleCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashMultiKeySingleCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeyDuplicateReductionOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeySingleCountColumnOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeySingleCountKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashLongKeySingleCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashSerializeKeySingleCountStarOperator; +import org.apache.hadoop.hive.ql.exec.vector.groupby.operator.gen.VectorGroupByHashStringKeySingleCountStarOperator; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -128,9 +148,13 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc; import org.apache.hadoop.hive.ql.plan.VectorPTFInfo; import org.apache.hadoop.hive.ql.plan.VectorPTFDesc.SupportedFunctionType; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc; import org.apache.hadoop.hive.ql.plan.VectorizationCondition; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; @@ -225,6 +249,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.mapred.InputFormat; @@ -304,6 +329,12 @@ private VectorizationEnabledOverride vectorizationEnabledOverride; boolean isTestForcedVectorizationEnable; + boolean isVectorizationGroupByNativeEnabled; + private VectorizationEnabledOverride vectorizationGroupByNativeEnabledOverride; + boolean isTestForcedVectorizationGroupByNativeEnable; + boolean weCanAttemptGroupByNativeVectorization; + int testGroupByMaxMemoryAvailable; + private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; private boolean useRowDeserialize; @@ -2288,6 +2319,44 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE return physicalContext; } + // Native Vector GROUP BY. + isVectorizationGroupByNativeEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED); + + final String testVectorizationGroupByNativeOverrideString = + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_OVERRIDE); + vectorizationGroupByNativeEnabledOverride = + VectorizationEnabledOverride.nameMap.get(testVectorizationGroupByNativeOverrideString); + + isTestForcedVectorizationGroupByNativeEnable = false; + switch (vectorizationGroupByNativeEnabledOverride) { + case NONE: + weCanAttemptGroupByNativeVectorization = isVectorizationGroupByNativeEnabled; + break; + case DISABLE: + weCanAttemptGroupByNativeVectorization = false; + break; + case ENABLE: + weCanAttemptGroupByNativeVectorization = true; + isTestForcedVectorizationGroupByNativeEnable = !isVectorizationGroupByNativeEnabled; + + // Different parts of the code rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED, true); + isVectorizationGroupByNativeEnabled = true; + break; + default: + throw new RuntimeException("Unexpected vectorization enabled override " + + vectorizationGroupByNativeEnabledOverride); + } + + testGroupByMaxMemoryAvailable = + HiveConf.getIntVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_GROUPBY_NATIVE_MAX_MEMORY_AVAILABLE); + + // Input Format control. useVectorizedInputFileFormat = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT); @@ -2505,7 +2574,10 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDescNoComplex(desc.getKeys(), "Key"); + + // Allow Complex Type key expressions here because we may specialize. + // Later we will verify again. + boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); if (!ret) { return false; } @@ -3734,6 +3806,292 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi return result; } + public static Operator specializeGroupByOperator( + Operator op, VectorizationContext vContext, + GroupByDesc desc, VectorGroupByDesc vectorDesc) + throws HiveException { + + VectorGroupByInfo vectorGroupByInfo = vectorDesc.getVectorGroupByInfo(); + + Operator vectorOp = null; + Class> opClass = null; + + VectorGroupByInfo.HashTableKeyType hashTableKeyType = + vectorGroupByInfo.getHashTableKeyType(); + + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + switch (aggregationVariation) { + case HASH_DUPLICATE_REDUCTION: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeyDuplicateReductionOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeyDuplicateReductionOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeyDuplicateReductionOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeyDuplicateReductionOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeyDuplicateReductionOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + + case HASH_SINGLE_COUNT: + { + SingleCountAggregationKind singleCountAggregationKind = + vectorGroupByInfo.getSingleCountAggregation().getSingleCountAggregationKind(); + + switch (singleCountAggregationKind) { + case COUNT_STAR: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeySingleCountStarOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeySingleCountStarOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeySingleCountStarOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeySingleCountStarOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeySingleCountStarOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_KEY: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeySingleCountKeyOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeySingleCountKeyOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeySingleCountKeyOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeySingleCountKeyOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeySingleCountKeyOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + case COUNT_COLUMN: + switch (hashTableKeyType) { + case LONG: + opClass = VectorGroupByHashLongKeySingleCountColumnOperator.class; + break; + case DECIMAL_64: + opClass = VectorGroupByHashDecimal64KeySingleCountColumnOperator.class; + break; + case STRING: + opClass = VectorGroupByHashStringKeySingleCountColumnOperator.class; + break; + case SERIALIZE: + opClass = VectorGroupByHashSerializeKeySingleCountColumnOperator.class; + break; + case MULTI_KEY: + opClass = VectorGroupByHashMultiKeySingleCountColumnOperator.class; + break; + default: + throw new RuntimeException( + "Unexpected hash table type " + hashTableKeyType); + } + break; + default: + throw new RuntimeException( + "Unexpected single count aggregation kind " + singleCountAggregationKind); + } + } + break; + + default: + throw new RuntimeException("Unexpected aggregation variation " + aggregationVariation); + } + + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + vectorDesc.setIsNative(true); + + vectorOp = OperatorFactory.getVectorOperator( + opClass, op.getCompilationOpContext(), desc, vContext, vectorDesc); + LOG.info("Vectorizer vectorizeOperator group by class " + vectorOp.getClass().getSimpleName()); + + return vectorOp; + } + + private boolean canSpecializeGroupBy(GroupByDesc desc, VectorGroupByDesc vectorDesc, + boolean isTezOrSpark, VectorizationContext vContext) throws HiveException { + + String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); + + VectorGroupByInfo vectorGroupByInfo = new VectorGroupByInfo(); + + List vectorizationIssueList = new ArrayList(); + + List keyDescs = desc.getKeys(); + final boolean isEmptyKey = keyDescs.isEmpty(); + final int outputKeyLength = keyDescs.size(); + + GroupByDesc.Mode groupByMode = desc.getMode(); + ProcessingMode processingMode = vectorDesc.getProcessingMode(); + + VectorExpression[] vecKeyExprs = vectorDesc.getKeyExpressions(); + final int vecKeyExprSize = vecKeyExprs.length; + + VectorAggregationDesc[] vecAggrDescs = vectorDesc.getVecAggrDescs(); + final int vecAggrDescSize = (vecAggrDescs == null ? 0 : vecAggrDescs.length); + + List aggrDescList = desc.getAggregators(); + + boolean isHash = (groupByMode == GroupByDesc.Mode.HASH); + final AggregationVariation aggregationVariation; + + SingleCountAggregation singleCountAggregation = null; + + if (isHash && + vecAggrDescSize == 0) { + + // No aggregations just means the key is being grouped. We are getting rid of duplicate keys. + + aggregationVariation = AggregationVariation.HASH_DUPLICATE_REDUCTION; + singleCountAggregation = null; + + } else if ( + isHash && + vecAggrDescSize == 1 && + aggrDescList.get(0).getGenericUDAFName().equalsIgnoreCase("count")) { + + // Single COUNT aggregation specialization. Store key and count in hash table without a + // hash element. + + AggregationDesc countAggrDesc = aggrDescList.get(0); + List countParamList = countAggrDesc.getParameters(); + final int countParamSize = countParamList.size(); + if (countParamSize == 0) { + + // COUNT(*) + + aggregationVariation = AggregationVariation.HASH_SINGLE_COUNT; + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_STAR); + + } else if (countParamSize == 1) { + + aggregationVariation = AggregationVariation.HASH_SINGLE_COUNT; + + VectorAggregationDesc countVecAggrDesc = vecAggrDescs[0]; + + final int inputColumnNum = countVecAggrDesc.getInputExpression().getOutputColumnNum(); + + boolean isKey = false; + for (VectorExpression vecKeyExpr : vecKeyExprs) { + if (vecKeyExpr.getOutputColumnNum() == inputColumnNum) { + isKey = true; + break; + } + } + if (isKey) { + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_KEY); + } else { + singleCountAggregation = + new SingleCountAggregation(SingleCountAggregationKind.COUNT_COLUMN, inputColumnNum); + } + } else { + + aggregationVariation = AggregationVariation.NONE; + + vectorizationIssueList.add( + "Cannot specialize aggregation function " + countAggrDesc.getGenericUDAFName() + + " that has more than 1 input parameter"); + } + + } else { + + // FUTURE: More aggregations. + aggregationVariation = AggregationVariation.NONE; + } + + // TEMPORARY: Restriction + boolean isSingleColumnKey = (vecKeyExprSize == 1); + + final VectorGroupByInfo.HashTableKeyType hashTableKeyType; + if (isSingleColumnKey) { + ColumnVector.Type colVectorType = vecKeyExprs[0].getOutputColumnVectorType(); + switch (colVectorType) { + case LONG: + + // Integer family, date, interval year month. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.LONG; + break; + + case DECIMAL_64: + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.DECIMAL_64; + break; + + case BYTES: + + // String family. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.STRING; + break; + + default: + + // All other data types get serialized. + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.SERIALIZE; + break; + } + } else { + hashTableKeyType = VectorGroupByInfo.HashTableKeyType.MULTI_KEY; + } + + vectorGroupByInfo.setIsVectorizationGroupByNativeEnabled( + weCanAttemptGroupByNativeVectorization); + vectorGroupByInfo.setEngine(engine); + + vectorGroupByInfo.setVectorizationIssueList(vectorizationIssueList); + + vectorGroupByInfo.setAggregationVariation(aggregationVariation); + vectorGroupByInfo.setSingleCountAggregation(singleCountAggregation); + + vectorGroupByInfo.setHashTableKeyType(hashTableKeyType); + + vectorGroupByInfo.setTestGroupByMaxMemoryAvailable(testGroupByMaxMemoryAvailable); + + // So EXPLAIN VECTORIZATION can show native conditions, etc. + vectorDesc.setVectorGroupByInfo(vectorGroupByInfo); + + if (!weCanAttemptGroupByNativeVectorization || + !isTezOrSpark || + (aggregationVariation == AggregationVariation.NONE) || + groupByMode != GroupByDesc.Mode.HASH || + desc.isGroupingSetsPresent() || + vectorizationIssueList.size() > 0) { + return false; + } + + return true; + } + private Operator specializeReduceSinkOperator( Operator op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkDesc vectorDesc) throws HiveException { @@ -4247,7 +4605,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } // No support for DECIMAL_64 input. We must convert. - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; // Fall through... @@ -4267,7 +4626,8 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } // No support for DECIMAL_64 input. We must convert. - inputExpression = vContext.wrapWithDecimal64ToDecimalConversion(inputExpression); + inputExpression = + VectorizationContext.wrapWithDecimal64ToDecimalConversion(inputExpression, vContext); inputColVectorType = ColumnVector.Type.DECIMAL; // Fall through... @@ -4304,16 +4664,30 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { - ImmutablePair,String> pair = + String issue = + doVectorizeGroupByOperatorPreparation( + groupByOp, vContext, vectorGroupByDesc); + Preconditions.checkState(issue == null); + return doVectorizeGroupByOperator( groupByOp, vContext, vectorGroupByDesc); - return pair.left; + } + + private static Operator doVectorizeGroupByOperator( + Operator groupByOp, VectorizationContext vContext, + VectorGroupByDesc vectorGroupByDesc) + throws HiveException { + Operator vectorOp = + OperatorFactory.getVectorOperator( + groupByOp.getCompilationOpContext(), (GroupByDesc) groupByOp.getConf(), + vContext, vectorGroupByDesc); + return vectorOp; } /* * NOTE: The VectorGroupByDesc has already been allocated and will be updated here. */ - private static ImmutablePair,String> doVectorizeGroupByOperator( + public static String doVectorizeGroupByOperatorPreparation( Operator groupByOp, VectorizationContext vContext, VectorGroupByDesc vectorGroupByDesc) throws HiveException { @@ -4322,9 +4696,10 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { List keysDesc = groupByDesc.getKeys(); - // For now, we don't support group by on DECIMAL_64 keys. + // Allow DECIMAL_64 key expressions in preparation because we may specialize. + // Later we will verify again. VectorExpression[] vecKeyExpressions = - vContext.getVectorExpressionsUpConvertDecimal64(keysDesc); + vContext.getVectorExpressions(keysDesc); ArrayList aggrDesc = groupByDesc.getAggregators(); final int size = aggrDesc.size(); @@ -4335,7 +4710,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { ImmutablePair pair = getVectorAggregationDesc(aggDesc, vContext); if (pair.left == null) { - return new ImmutablePair, String>(null, pair.right); + return pair.right; } vecAggrDescs[i] = pair.left; @@ -4346,14 +4721,9 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { vectorGroupByDesc.setKeyExpressions(vecKeyExpressions); vectorGroupByDesc.setVecAggrDescs(vecAggrDescs); vectorGroupByDesc.setProjectedOutputColumns(projectedOutputColumns); - Operator vectorOp = - OperatorFactory.getVectorOperator( - groupByOp.getCompilationOpContext(), groupByDesc, - vContext, vectorGroupByDesc); - return new ImmutablePair, String>(vectorOp, null); - } - static int fake; + return null; // No issue. + } public static Operator vectorizeSelectOperator( Operator selectOp, VectorizationContext vContext, @@ -4902,23 +5272,51 @@ private static VectorPTFInfo createVectorPTFInfo(Operator,String> pair = - doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); - if (pair.left == null) { - setOperatorIssue(pair.right); + String issue = + doVectorizeGroupByOperatorPreparation(op, vContext, vectorGroupByDesc); + if (issue != null) { + setOperatorIssue(issue); throw new VectorizerCannotVectorizeException(); } - vectorOp = pair.left; - isNative = false; + + GroupByDesc groupByDesc = (GroupByDesc) op.getConf(); + boolean specialize = + canSpecializeGroupBy(groupByDesc, vectorGroupByDesc, isTezOrSpark, vContext); + + if (!specialize) { + + // Re-validate -- this time do not allow Complex Type keys. + boolean isNoComplexTypeKey = + validateExprNodeDescNoComplex(groupByDesc.getKeys(), "Key"); + if (!isNoComplexTypeKey) { + throw new VectorizerCannotVectorizeException(); + } + + // Regular VectorGroupByOperator does not support DECIMAL_64 keys. + VectorizationContext.upConvertDecimal64( + vectorGroupByDesc.getKeyExpressions(), vContext); + + vectorOp = + doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc); + isNative = false; + + } else { + + vectorOp = + specializeGroupByOperator(op, vContext, groupByDesc, vectorGroupByDesc); + isNative = true; + } if (vectorTaskColumnInfo != null) { VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions(); if (usesVectorUDFAdaptor(vecKeyExpressions)) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); - for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { - if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { - vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + if (vecAggrDescs != null) { + for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { + if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) { + vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index 31237c8..b1bcc11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -20,8 +20,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc; @@ -31,7 +33,10 @@ import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; - +import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.AggregationVariation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation; +import org.apache.hadoop.hive.ql.plan.VectorGroupByInfo.SingleCountAggregation.SingleCountAggregationKind; /** * GroupByDesc. @@ -324,26 +329,38 @@ public Object clone() { this.groupingSetPosition, this.isDistinct); } + // Use LinkedHashSet to give predictable display order. + private static final Set vectorizableGroupByNativeEngines = + new LinkedHashSet(Arrays.asList("tez", "spark")); + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { private final GroupByDesc groupByDesc; private final VectorGroupByDesc vectorGroupByDesc; + private final VectorGroupByInfo vectorGroupByInfo; + + private VectorizationCondition[] nativeConditions; public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, VectorGroupByDesc vectorGroupByDesc) { - // Native vectorization not supported. - super(vectorGroupByDesc, false); + super(vectorGroupByDesc, vectorGroupByDesc.isNative()); this.groupByDesc = groupByDesc; this.vectorGroupByDesc = vectorGroupByDesc; + vectorGroupByInfo = vectorGroupByDesc.getVectorGroupByInfo(); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "keyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getKeysExpression() { return vectorExpressionsToStringList(vectorGroupByDesc.getKeyExpressions()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "aggregators", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getAggregators() { + if (isNative) { + return null; + } VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs(); List vecAggrList = new ArrayList(vecAggrDescs.length); for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) { @@ -352,17 +369,20 @@ public GroupByOperatorExplainVectorization(GroupByDesc groupByDesc, return vecAggrList; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProcessingMode() { return vectorGroupByDesc.getProcessingMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getGroupByMode() { return groupByDesc.getMode().name(); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getVectorOutputConditionsNotMet() { List results = new ArrayList(); @@ -379,13 +399,109 @@ public String getGroupByMode() { return results; } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "projectedOutputColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getProjectedOutputColumnNums() { return Arrays.toString(vectorGroupByDesc.getProjectedOutputColumns()); } + + private VectorizationCondition[] createNativeConditions() { + + boolean enabled = vectorGroupByInfo.getIsVectorizationGroupByNativeEnabled(); + + String engine = vectorGroupByInfo.getEngine(); + String engineInSupportedCondName = + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableGroupByNativeEngines; + boolean engineInSupported = vectorizableGroupByNativeEngines.contains(engine); + + final List vectorizationIssueList = vectorGroupByInfo.getVectorizationIssueList(); + + List conditionList = new ArrayList(); + conditionList.add( + new VectorizationCondition( + enabled, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_NATIVE_ENABLED.varname)); + conditionList.add( + new VectorizationCondition( + engineInSupported, + engineInSupportedCondName)); + AggregationVariation aggregationVariation = vectorGroupByInfo.getAggregationVariation(); + conditionList.add( + new VectorizationCondition( + (aggregationVariation == AggregationVariation.HASH_SINGLE_COUNT || + aggregationVariation == AggregationVariation.HASH_DUPLICATE_REDUCTION), + "Single COUNT aggregation or Duplicate Reduction")); + conditionList.add( + new VectorizationCondition( + (vectorGroupByDesc.getProcessingMode() == ProcessingMode.HASH), + "Group By Mode HASH")); + conditionList.add( + new VectorizationCondition( + !groupByDesc.isGroupingSetsPresent(), + "No Grouping Sets")); + if (vectorizationIssueList.size() != 0) { + conditionList.add( + new VectorizationCondition( + true, + "Has issues \"" + + vectorizationIssueList.toString() + "\"")); + } + + VectorizationCondition[] conditions = + conditionList.toArray(new VectorizationCondition[0]); + + return conditions; + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getNativeConditionsNotMet() { + + // For now, just report native conditions met / not met for HASH mode. + // It dramatically limits the number of Q file differences. + if (vectorGroupByDesc.getProcessingMode() != ProcessingMode.HASH) { + return null; + } + + if (nativeConditions == null) { + nativeConditions = createNativeConditions(); + } + return VectorizationCondition.getConditionsNotMet(nativeConditions); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "singleCountAggreation", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getSingleCountAggreation() { + if (!isNative) { + return null; + } + final SingleCountAggregationKind singleCountAggregationKind = + vectorGroupByInfo.getSingleCountAggregation().getSingleCountAggregationKind(); + if (singleCountAggregationKind == SingleCountAggregationKind.NONE) { + return null; + } + return singleCountAggregationKind.name(); + } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Group By Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public GroupByOperatorExplainVectorization getGroupByVectorization() { VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) getVectorDesc(); if (vectorGroupByDesc == null) { @@ -404,11 +520,14 @@ public static String getComplexTypeEnabledCondition( public static String getComplexTypeWithGroupByEnabledCondition( boolean isVectorizationComplexTypesEnabled, boolean isVectorizationGroupByComplexTypesEnabled) { - final boolean enabled = (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + final boolean enabled = + (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); return "(" + - HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationComplexTypesEnabled + " AND " + - HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationGroupByComplexTypesEnabled + + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + + isVectorizationGroupByComplexTypesEnabled + ") IS " + enabled; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index caf0c67..b7e60f7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -67,8 +67,12 @@ private boolean isVectorizationComplexTypesEnabled; private boolean isVectorizationGroupByComplexTypesEnabled; + private boolean isNative; + private VectorGroupByInfo vectorGroupByInfo; + public VectorGroupByDesc() { - this.processingMode = ProcessingMode.NONE; + processingMode = ProcessingMode.NONE; + isNative = false; } public void setProcessingMode(ProcessingMode processingMode) { @@ -78,6 +82,14 @@ public ProcessingMode getProcessingMode() { return processingMode; } + public void setIsNative(boolean isNative) { + this.isNative = isNative; + } + + public boolean isNative() { + return isNative; + } + public void setKeyExpressions(VectorExpression[] keyExpressions) { this.keyExpressions = keyExpressions; } @@ -118,6 +130,14 @@ public boolean getIsVectorizationGroupByComplexTypesEnabled() { return isVectorizationGroupByComplexTypesEnabled; } + public void setVectorGroupByInfo(VectorGroupByInfo vectorGroupByInfo) { + this.vectorGroupByInfo = vectorGroupByInfo; + } + + public VectorGroupByInfo getVectorGroupByInfo() { + return vectorGroupByInfo; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java new file mode 100644 index 0000000..54682b4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByInfo.java @@ -0,0 +1,184 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.base.Preconditions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; + +/** + * VectorGroupByInfo. + * + * A convenience data structure that has information needed to vectorize group by. + * + * It is created by the Vectorizer when it is determining whether it can specialize so the + * information doesn't have to be recreated again and agains by the VectorGroupByOperator's + * constructors and later during execution. + */ +public class VectorGroupByInfo { + + private static long serialVersionUID = 1L; + + public static enum HashTableKeyType { + NONE, + LONG, + DECIMAL_64, + STRING, + SERIALIZE, + MULTI_KEY + } + + //------------------------------------------------------------------------------------------------ + + public static enum AggregationVariation { + NONE, + HASH_SINGLE_COUNT, + HASH_DUPLICATE_REDUCTION + } + + public static class SingleCountAggregation { + + public enum SingleCountAggregationKind { + NONE, + COUNT_STAR, + COUNT_KEY, + COUNT_COLUMN + } + + private final SingleCountAggregationKind singleCountAggregationKind; + private final int countColumnNum; + + public SingleCountAggregation(SingleCountAggregationKind singleCountAggregationKind) { + this.singleCountAggregationKind = singleCountAggregationKind; + countColumnNum = -1; + } + + public SingleCountAggregation(SingleCountAggregationKind singleCountAggregationKind, + int countColumnNum) { + this.singleCountAggregationKind = singleCountAggregationKind; + this.countColumnNum = countColumnNum; + } + + public SingleCountAggregationKind getSingleCountAggregationKind() { + return singleCountAggregationKind; + } + + public int getCountColumnNum() { + return countColumnNum; + } + } + + //--------------------------------------------------------------------------- + + private boolean isVectorizationGroupByNativeEnabled; + private String engine; + + private List vectorizationIssueList; + + private AggregationVariation aggregationVariation; + private SingleCountAggregation singleCountAggregation; + + private HashTableKeyType hashTableKeyType; + + private int testGroupByMaxMemoryAvailable; + + public VectorGroupByInfo() { + isVectorizationGroupByNativeEnabled = false; + + vectorizationIssueList = null; + + hashTableKeyType = HashTableKeyType.NONE; + + testGroupByMaxMemoryAvailable = -1; + } + + public boolean getIsVectorizationGroupByNativeEnabled() { + return isVectorizationGroupByNativeEnabled; + } + + public void setIsVectorizationGroupByNativeEnabled(boolean isVectorizationGroupByNativeEnabled) { + this.isVectorizationGroupByNativeEnabled = isVectorizationGroupByNativeEnabled; + } + + public String getEngine() { + return engine; + } + + public void setEngine(String engine) { + this.engine = engine; + } + + public List getVectorizationIssueList() { + return vectorizationIssueList; + } + + public void setVectorizationIssueList(List vectorizationIssueList) { + this.vectorizationIssueList = vectorizationIssueList; + } + + public void setAggregationVariation(AggregationVariation aggregationVariation) { + this.aggregationVariation = aggregationVariation; + } + + public AggregationVariation getAggregationVariation() { + return aggregationVariation; + } + + public void setSingleCountAggregation(SingleCountAggregation singleCountAggregation) { + this.singleCountAggregation = singleCountAggregation; + } + + public SingleCountAggregation getSingleCountAggregation() { + return singleCountAggregation; + } + + public HashTableKeyType getHashTableKeyType() { + return hashTableKeyType; + } + + public void setHashTableKeyType(HashTableKeyType hashTableKeyType) { + this.hashTableKeyType = hashTableKeyType; + } + + public int getTestGroupByMaxMemoryAvailable() { + return testGroupByMaxMemoryAvailable; + } + + public void setTestGroupByMaxMemoryAvailable(int testGroupByMaxMemoryAvailable) { + this.testGroupByMaxMemoryAvailable = testGroupByMaxMemoryAvailable; + } +} diff --git ql/src/test/queries/clientpositive/vector_groupby_multikey.q ql/src/test/queries/clientpositive/vector_groupby_multikey.q new file mode 100644 index 0000000..c91b026 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_multikey.q @@ -0,0 +1,151 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt; +CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_multi_1a values (NULL, NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_multi_1a values (date '2207-09-16', -13); +insert into groupby_multi_1a values (date '2018-04-20', 18); + +CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt; +CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt; + +insert into groupby_multi_1a values (date '2111-10-04', -81); +insert into groupby_multi_1a values (date '2018-04-21', 19); + + + +-- *_multi_1a + +-- COUNT_KEY +-- explain vectorization operator +-- select key0, key1, count(key0, key1) from groupby_multi_1a group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- COUNT_STAR +explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1; + +-- *_multi_1a_nonull + +-- COUNT_KEY +-- select key0, key1, count(key0, key1) from groupby_multi_1a_nonull group by key0, key1; +-- select key0, key1, count(key0, key1) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- COUNT_STAR +select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1; +select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1; +select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1; + + + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- MULTI-KEY: STRING, BOOLEAN +-- explain vectorization operator +-- select s, bo, count(s, bo) from over10k group by s, bo order by s, bo limit 10; +-- select s, bo, count(s, bo) from over10k group by s, bo order by s, bo limit 10; + +explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10; +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10; + +explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10; +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10; + +-- MULTI-KEY: TIMESTAMP, SMALLINT +-- explain vectorization operator +-- select ts, si, count(ts, si) from over10k group by ts, si order by ts, si limit 10; +-- select ts, si, count(ts, si) from over10k group by ts, si order by ts, si limit 10; + +explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10; +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10; + +explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10; +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10; + +-- MULTI-KEY: DECIMAL: BINARY +-- explain vectorization operator +-- select `dec`, bin, count(`dec`, bin) from over10k group by `dec`, bin order by `dec`, bin limit 10; +-- select `dec`, bin, count(`dec`, bin) from over10k group by `dec`, bin order by `dec`, bin limit 10; + +explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10; +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10; + +explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10; +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +-- explain vectorization operator +-- select i, b, count(i, b) from over10k group by i, b order by i, b limit 10; +-- select i, b, count(i, b) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10; +select i, b, count(si) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10; +select i, b, count(*) from over10k group by i, b order by i, b limit 10; + +explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10; +select i, b from over10k group by i, b order by i, b limit 10; diff --git ql/src/test/queries/clientpositive/vector_groupby_singlekey.q ql/src/test/queries/clientpositive/vector_groupby_singlekey.q new file mode 100644 index 0000000..b68a4c6 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_groupby_singlekey.q @@ -0,0 +1,710 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.groupby.native.enabled=true; +-- We want to create selectedInUse batches with WHERE expressions. +SET hive.optimize.ppd=false; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +-- SORT_QUERY_RESULTS + + + +CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt; +CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_long_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_long_1a values (-5206670856103795573); +insert into groupby_long_1a values (800); + +CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt; +CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt; + +insert into groupby_long_1a_nonull values (-6187919478609154811); +insert into groupby_long_1a_nonull values (1000); + + + +CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt; +CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt; + +insert into groupby_long_1b values (NULL); + +insert into groupby_long_1b values (32030); +insert into groupby_long_1b values (800); + +CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt; +CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt; + +insert into groupby_long_1b_nonull values (31713); +insert into groupby_long_1b_nonull values (34); + + + +CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt; +CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt; + +insert into groupby_long_1c values (NULL, NULL); +insert into groupby_long_1c values (NULL, 'TKTKGVGFW'); +insert into groupby_long_1c values (NULL, 'NEW'); + +CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt; +CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt; + +insert into groupby_long_1c values (1928928239, NULL); +insert into groupby_long_1c values (9999, 'NEW'); + + + +-- *_long_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a group by key; +select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a group by key; +select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1a group by key order by key; +select key from groupby_long_1a group by key order by key; +select key from groupby_long_1a where key != -8460550397108077433 group by key order by key; + +-- *_long_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1a_nonull group by key; +select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1a_nonull group by key; +select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key; +select key from groupby_long_1a_nonull group by key order by key; +select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key; + +-- *_long_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b group by key; +select key, count(key) from groupby_long_1b where key != 32030 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b group by key; +select key, count(*) from groupby_long_1b where key != 32030 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1b group by key order by key; +select key from groupby_long_1b group by key order by key; +select key from groupby_long_1b where key != -32030 group by key order by key; + +-- *_long_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1b_nonull group by key; +select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1b_nonull group by key; +select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key; +select key from groupby_long_1b_nonull group by key order by key; +select key from groupby_long_1b_nonull where key != -32030 group by key order by key; + +-- *_long_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c group by key; +select key, count(key) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c group by key; +select key, count(*) from groupby_long_1c where key != -1437463633 group by key; + +-- COUNT_COLUMN +explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c group by key; +select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1c group by key order by key; +select key from groupby_long_1c group by key order by key; +select key from groupby_long_1c where key != -32030 group by key order by key; + +-- *_long_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_long_1c_nonull group by key; +select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_long_1c_nonull group by key; +select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- COUNT_COLUMN +select key, count(b_string) from groupby_long_1c_nonull group by key; +select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key; +select key from groupby_long_1c_nonull group by key order by key; +select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key; + + +set hive.llap.io.enabled=false; +set hive.llap.io.encode.enabled=false; + +CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a; + +-- Add a single NULL row. +insert into groupby_decimal64_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_decimal64_1a values (333.33); +insert into groupby_decimal64_1a values (800); + +CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull; + +insert into groupby_decimal64_1a_nonull values (-76.2); +insert into groupby_decimal64_1a_nonull values (100); + + +CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b; + +insert into groupby_decimal64_1b values (NULL, NULL); + +insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01); +insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800); + +CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull; + +insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02); +insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34); + + +-- *_decimal64_1a + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1a group by key; +select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1a group by key; +select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1a group by key order by key; +select key from groupby_decimal64_1a group by key order by key; +select key from groupby_decimal64_1a where key != -0.342 group by key order by key; + + +-- *_decimal64_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1a_nonull group by key; +select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1a_nonull group by key; +select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key; +select key from groupby_decimal64_1a_nonull group by key order by key; +select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key; + + +-- *_decimal64_1b + +-- COUNT_KEY +explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key; +select key, count(key) from groupby_decimal64_1b group by key; +select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key; + +-- COUNT_STAR +explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key; +select key, count(*) from groupby_decimal64_1b group by key; +select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key; + +-- COUNT_COLUMN +explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key; +select key, count(c_timestamp) from groupby_decimal64_1b group by key; +select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1b group by key order by key; +select key from groupby_decimal64_1b group by key order by key; +select key from groupby_decimal64_1b where key != 11041.91 group by key order by key; + +-- *_decimal64_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_decimal64_1b_nonull group by key; +select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- COUNT_STAR +select key, count(*) from groupby_decimal64_1b_nonull group by key; +select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- COUNT_COLUMN +select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key; +select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key; + +-- DUPLICATE_REDUCTION +explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key; +select key from groupby_decimal64_1b_nonull group by key order by key; +select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + + + +CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt; +CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt; + +-- Add a single NULL row that will come from ORC as isRepeated. +insert into groupby_string_1a values (NULL); + +-- And, a single non-NULL key already in the table and one that isn't row that will come +-- from ORC as isRepeated, too. +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt; +CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt; + +insert into groupby_string_1a_nonull values ('PXLD'); +insert into groupby_string_1a_nonull values ('AA'); + +-- Use same data as 1a. +CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt; +CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt; + +insert into groupby_string_1a values (NULL); + +insert into groupby_string_1a values ('QNCYBDW'); +insert into groupby_string_1a values ('NOT'); + +CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt; +CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt; + +insert into groupby_string_1b_nonull values ('PXLD'); +insert into groupby_string_1b_nonull values ('AA'); + +CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt; +CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt; + +insert into groupby_string_1c values (NULL, NULL, NULL); +insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values (NULL, '2018-04-11', NULL); + +insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL); +insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL); + +insert into groupby_string_1c values ('BB', NULL, NULL); +insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c values ('DD', '2018-04-14', NULL); + +CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt; +CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt; + +insert into groupby_string_1c_nonull values ('SDA', NULL, NULL); +insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454'); +insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL); + +insert into groupby_string_1c_nonull values ('EEE', NULL, NULL); +insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888'); +insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL); + +-- *_string_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a group by key; +select key, count(key) from groupby_string_1a where key != 'PXLD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a group by key; +select key, count(*) from groupby_string_1a where key != 'PXLD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1a group by key order by key; +select key from groupby_string_1a group by key order by key; +select key from groupby_string_1a where key != 'PXLD' group by key order by key; + +-- *_string_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1a_nonull group by key; +select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1a_nonull group by key; +select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key; +select key from groupby_string_1a_nonull group by key order by key; +select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key; + +-- *_string_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b group by key; +select key, count(key) from groupby_string_1b where key != 'MXGD' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b group by key; +select key, count(*) from groupby_string_1b where key != 'MXGD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1b group by key order by key; +select key from groupby_string_1b group by key order by key; +select key from groupby_string_1b where key != 'MXGD' group by key order by key; + +-- *_string_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1b_nonull group by key; +select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1b_nonull group by key; +select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key; +select key from groupby_string_1b_nonull group by key order by key; +select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key; + +-- *_string_1c + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c group by key; +select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c group by key; +select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c group by key; +select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c group by key; +select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1c group by key order by key; +select key from groupby_string_1c group by key order by key; +select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key; + +-- *_string_1c_nonull + +-- COUNT_KEY +select key, count(key) from groupby_string_1c_nonull group by key; +select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_string_1c_nonull group by key; +select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_date +select key, count(s_date) from groupby_string_1c_nonull group by key; +select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- COUNT_COLUMN s_timestamp +select key, count(s_timestamp) from groupby_string_1c_nonull group by key; +select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key; +select key from groupby_string_1c_nonull group by key order by key; +select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key; + + + +CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt; +CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt; + +CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt; +CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt; + + +CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt; +CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt; + +CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt; +CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt; + + +-- *_serialize_1a + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a group by key; +select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a group by key; +select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1a group by key order by key; +select key from groupby_serialize_1a group by key order by key; +select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key; + +-- *_serialize_1a_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1a_nonull group by key; +select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1a_nonull group by key; +select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key; +select key from groupby_serialize_1a_nonull group by key order by key; +select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key; + +-- *_serialize_1b + +-- COUNT_KEY +explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b group by key; +select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b group by key; +select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b group by key; +select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b group by key; +select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1b group by key order by key; +select key from groupby_serialize_1b group by key order by key; +select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key; + +-- *_serialize_1b_nonull + +-- COUNT_KEY +select key, count(key) from groupby_serialize_1b_nonull group by key; +select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_STAR +select key, count(*) from groupby_serialize_1b_nonull group by key; +select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_smallint +select key, count(c_smallint) from groupby_serialize_1b_nonull group by key; +select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key; + +-- COUNT_COLUMN c_string +select key, count(c_string) from groupby_serialize_1b_nonull group by key; +select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key; + +-- DUPLICATE_REDUCTION +explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key; +select key from groupby_serialize_1b_nonull group by key order by key; +select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key; + +------------------------------------------------------------------------------------------ + +CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k; + +-- STRING +explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10; +select s, count(s) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10; +select s, count(ts) from over10k group by s order by s limit 10; + +explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10; +select s, count(*) from over10k group by s order by s limit 10; + +-- SERIALIZE TIMESTAMP +explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10; +select ts, count(ts) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10; +select ts, count(d) from over10k group by ts order by ts limit 10; + +explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10; +select ts, count(*) from over10k group by ts order by ts limit 10; + +-- SERIALIZE DECIMAL +explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10; + +explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10; + + +set hive.test.vectorized.groupby.native.max.memory.available=1024; + +explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10; +select i, count(i) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10; +select i, count(b) from over10k group by i order by i limit 10; + +explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10; +select i, count(*) from over10k group by i order by i limit 10; + +explain vectorization operator +select i from over10k group by i order by i limit 10; +select i from over10k group by i order by i limit 10; diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 89b7169..73f1e6c 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1497,6 +1497,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -1590,10 +1592,11 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -1604,10 +1607,11 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1] Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2284,6 +2288,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string, col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] Reduce Sink Vectorization: @@ -2377,10 +2383,11 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: @@ -2391,10 +2398,11 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1] Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] App Master Event Vectorization: diff --git ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 05d5d0a..d46ac5a 100644 --- ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -682,7 +682,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: struct) Statistics: Num rows: 613 Data size: 51492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: llap @@ -718,7 +718,6 @@ POSTHOOK: query: select ROW__ID, count(*) from over10k_orc_bucketed group by ROW POSTHOOK: type: QUERY POSTHOOK: Input: default@over10k_orc_bucketed #### A masked pattern was here #### -NULL 6 PREHOOK: query: select ROW__ID, * from over10k_orc_bucketed where ROW__ID is null PREHOOK: type: QUERY PREHOOK: Input: default@over10k_orc_bucketed diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index be8747a..21a2075 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -1274,52 +1274,52 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_orc PREHOOK: Input: default@over1k_part_orc@ds=foo/t=27 PREHOOK: Input: default@over1k_part_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_orc POSTHOOK: Input: default@over1k_part_orc@ds=foo/t=27 POSTHOOK: Input: default@over1k_part_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 38 PREHOOK: query: select count(*) from over1k_part_limit_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_limit_orc PREHOOK: Input: default@over1k_part_limit_orc@ds=foo/t=27 PREHOOK: Input: default@over1k_part_limit_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_limit_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_limit_orc POSTHOOK: Input: default@over1k_part_limit_orc@ds=foo/t=27 POSTHOOK: Input: default@over1k_part_limit_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 20 PREHOOK: query: select count(*) from over1k_part_buck_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_orc PREHOOK: Input: default@over1k_part_buck_orc@t=27 PREHOOK: Input: default@over1k_part_buck_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_orc POSTHOOK: Input: default@over1k_part_buck_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 34 PREHOOK: query: select count(*) from over1k_part_buck_sort_orc PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort_orc PREHOOK: Input: default@over1k_part_buck_sort_orc@t=27 PREHOOK: Input: default@over1k_part_buck_sort_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_sort_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort_orc POSTHOOK: Input: default@over1k_part_buck_sort_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 34 PREHOOK: query: create table over1k_part2_orc( si smallint, @@ -1938,13 +1938,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2_orc PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2_orc POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 405 65536 4294967508 82.24 foo 27 457 65570 4294967464 81.58 foo 27 256 65599 4294967383 89.55 foo 27 @@ -1969,13 +1969,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2_orc PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2_orc POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY @@ -2079,13 +2079,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2_orc PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2_orc POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 405 65536 4294967508 82.24 foo 27 457 65570 4294967464 81.58 foo 27 256 65599 4294967383 89.55 foo 27 @@ -2110,13 +2110,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2_orc PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 PREHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2_orc POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2_orc@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: create table over1k_part_buck_sort2_orc( si smallint, @@ -2451,13 +2451,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part_buck_sort2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2_orc POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 329 65778 4294967451 6.63 27 367 65675 4294967518 12.32 27 278 65622 4294967516 25.67 27 @@ -2538,13 +2538,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_sort2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2_orc POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -2665,13 +2665,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part_buck_sort2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2_orc POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 329 65778 4294967451 6.63 27 367 65675 4294967518 12.32 27 278 65622 4294967516 25.67 27 @@ -2750,11 +2750,11 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2_orc PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 PREHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_sort2_orc POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2_orc POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2_orc@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 17 diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 799062e..cfbed1b 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1721,10 +1721,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 10:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: tinyint) @@ -1748,7 +1749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -2106,6 +2107,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 2c13d5d..b7fdb56 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -252,6 +254,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index 96be039..dc2952c 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -127,6 +127,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -2283,6 +2285,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -3428,6 +3432,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index 4cd56f8..be235e1 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -245,6 +245,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index f4d6218..c5802bb 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -78,11 +78,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -154,11 +154,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -221,6 +221,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -333,11 +335,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2144 2145 2142 2143 2140 2141 @@ -366,11 +368,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -442,11 +444,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -509,6 +511,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -621,11 +625,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2146 2147 2144 2145 2142 2143 @@ -654,11 +658,11 @@ POSTHOOK: Lineage: parquet_complex_types.st1 SIMPLE [(parquet_complex_types_stag PREHOOK: query: select count(*) from parquet_complex_types PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_complex_types POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: explain vectorization expression select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY @@ -730,11 +734,11 @@ STAGE PLANS: PREHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select l1, l1[0], l1[1], l1[listIndex], listIndex from parquet_complex_types limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### [100,101] 100 101 100 0 [102,103] 102 103 103 1 [104,105] 104 105 104 0 @@ -797,6 +801,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -909,11 +915,11 @@ STAGE PLANS: PREHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_complex_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(l1[0]), l1[1] from parquet_complex_types where l1[0] > 1000 group by l1[1] order by l1[1] desc limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_complex_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2148 2149 2146 2147 2144 2145 diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index a2bb0f3..3c41a3f 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -88,11 +88,11 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 @@ -167,12 +167,12 @@ PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456 stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -237,6 +237,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) @@ -350,12 +352,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -386,22 +388,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -416,12 +418,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 @@ -452,22 +454,22 @@ POSTHOOK: Lineage: parquet_map_type.stringmap SIMPLE [(parquet_map_type_staging) PREHOOK: query: select count(*) from parquet_map_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_map_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select stringMap, intMap, doubleMap, stringMap['k2'], intMap[456], doubleMap[123.123], stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_map_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 {"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 {"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 @@ -482,12 +484,12 @@ PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1' from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_map_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 199 100.1 v100 1999 1000.1 v1000 2001 1001.1 v1001 diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index 7249363..71ec45a 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -66,11 +66,11 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1023 PREHOOK: query: explain vectorization expression select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY @@ -142,11 +142,11 @@ STAGE PLANS: PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -160,11 +160,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -178,11 +178,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -245,6 +245,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -330,11 +332,11 @@ STAGE PLANS: PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -361,20 +363,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1024 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -388,11 +390,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -406,11 +408,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -424,11 +426,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 @@ -455,20 +457,20 @@ POSTHOOK: Lineage: parquet_struct_type.st2 SIMPLE [(parquet_struct_type_staging) PREHOOK: query: select count(*) from parquet_struct_type PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from parquet_struct_type POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1025 PREHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1, st1.f1, st1.f2, st2, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### {"f1":1,"f2":"str1"} 1 str1 {"f1":2001,"f3":"str2001"} 2001 str2001 {"f1":2,"f2":"str2"} 2 str2 {"f1":2002,"f3":"str2002"} 2002 str2002 {"f1":3,"f2":"str3"} 3 str3 {"f1":2003,"f3":"str2003"} 2003 str2003 @@ -482,11 +484,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1, st2.f3 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 str2001 2 2002 str2002 3 2003 str2003 @@ -500,11 +502,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select st1.f1, st2.f1 from parquet_struct_type limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 2001 2 2002 3 2003 @@ -518,11 +520,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_struct_type -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select sum(st1.f1), st1.f1 from parquet_struct_type where st1.f1 > 500 group by st1.f1 order by st1.f1 limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_struct_type -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 501 501 502 502 503 503 diff --git ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out index c786ba3..081f6de 100644 --- ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out @@ -99,11 +99,11 @@ POSTHOOK: Output: default@parquet_types_staging PREHOOK: query: SELECT * FROM parquet_types_staging PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types_staging -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT * FROM parquet_types_staging POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types_staging -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} 2011-01-01 101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} 2012-02-02 102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} 2013-03-03 @@ -258,7 +258,7 @@ GROUP BY ctinyint ORDER BY ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT ctinyint, MAX(cint), MIN(csmallint), @@ -270,7 +270,7 @@ GROUP BY ctinyint ORDER BY ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1 121 1 8 1.175 2.06216 2 119 1 7 1.21429 1.8 3 120 1 7 1.17143 1.8 @@ -352,11 +352,11 @@ STAGE PLANS: PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 1.0 5 1.1 5 1.2 4 @@ -440,11 +440,11 @@ STAGE PLANS: PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### a 1 ab 1 abc 1 @@ -541,11 +541,11 @@ STAGE PLANS: PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### B4F3CAFDBE 1 a 1 ab 1 @@ -638,11 +638,11 @@ STAGE PLANS: PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### abc 1 bcd 1 cde 1 @@ -742,11 +742,11 @@ STAGE PLANS: PREHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 2011-01-01 01:01:01.111111111 1 2012-02-02 02:02:02.222222222 1 2013-03-03 03:03:03.333333333 1 @@ -838,11 +838,11 @@ STAGE PLANS: PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary PREHOOK: type: QUERY PREHOOK: Input: default@parquet_types -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 68692CCAC0BDE7 10 1 68656C6C6F 1 diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 1e090f0..f239122 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -426,6 +430,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index c99ac8d..20b3278 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 54216fa..04bddca 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -266,10 +266,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 6093beb..88d828c 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -162,10 +162,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -187,7 +187,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -366,10 +366,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -391,7 +391,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -760,10 +760,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -785,7 +785,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1111,11 +1111,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1140,7 +1140,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1249,11 +1249,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1278,7 +1278,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1387,11 +1387,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1416,7 +1416,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1525,11 +1525,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1554,7 +1554,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index aabfc73..6384a63 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -175,6 +175,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -365,11 +367,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 10:binary - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) @@ -394,7 +396,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 861ae9a..93743a5 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -149,6 +149,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 9a43659..ac02e9b 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -306,6 +308,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index e8bb722..13557f7 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -78,6 +78,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -303,6 +305,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f2277c1..08b85af 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -936,12 +936,13 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -962,7 +963,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1054,11 +1055,26 @@ STAGE PLANS: TableScan alias: orc_create_complex Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:str:string, 1:mp:map, 2:lst:array, 3:strct:struct, 4:val:string, 5:ROW__ID:struct] Select Operator expressions: lst (type: array), strct (type: struct) outputColumnNames: lst, strct + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 2:array, col 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] keys: lst (type: array), strct (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1067,15 +1083,30 @@ STAGE PLANS: key expressions: _col0 (type: array), _col1 (type: struct) sort order: ++ Map-reduce partition columns: _col0 (type: array), _col1 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] Statistics: Num rows: 13503 Data size: 29968544 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [2, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1157,13 +1188,14 @@ STAGE PLANS: Group By Operator aggregations: count(val) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN keys: str (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1188,7 +1220,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1300,13 +1332,14 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 6:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 @@ -1331,7 +1364,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1427,12 +1460,28 @@ STAGE PLANS: TableScan alias: orc_create_complex Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:str:string, 1:mp:map, 2:lst:array, 3:strct:struct, 4:val:string, 5:ROW__ID:struct] Select Operator expressions: str (type: string), mp (type: map), lst (type: array), strct (type: struct), val (type: string) outputColumnNames: str, mp, lst, strct, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(val) + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 0:string, col 1:map, col 2:array, col 3:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN keys: str (type: string), mp (type: map), lst (type: array), strct (type: struct) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -1441,16 +1490,31 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [4] Statistics: Num rows: 13503 Data size: 46492296 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type MAP not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 90086ea..e6059fd 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1265,10 +1265,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1292,7 +1293,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1319,10 +1320,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_data_types.q.out ql/src/test/results/clientpositive/llap/vector_data_types.q.out index 8dd959e..63e0f94 100644 --- ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -381,6 +381,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 902d137..64fc5ba 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -88,6 +88,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -268,6 +270,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -482,6 +486,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -682,6 +688,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 50e4305..d9d2526 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -592,6 +592,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1211,6 +1213,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index c6867f8..0ab5427 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2304,6 +2304,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -3245,6 +3247,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3409,6 +3413,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -3655,6 +3661,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3786,6 +3794,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3913,12 +3923,13 @@ STAGE PLANS: Group By Operator aggregations: count(key) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:decimal(20,10)) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -3940,7 +3951,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6300,6 +6311,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: value (type: int) @@ -7247,6 +7260,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7412,6 +7427,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: _col0 (type: int) @@ -7659,6 +7676,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7791,6 +7810,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7919,12 +7940,13 @@ STAGE PLANS: Group By Operator aggregations: count(key) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:decimal(15,3)) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE @@ -7947,7 +7969,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index 73d04a9..6a1f546 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -140,10 +140,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 357388 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) @@ -167,7 +168,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index 127d8ad..7b2bfa5 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 3bfbda0..9354ff7 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -60,6 +60,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -198,6 +200,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -362,6 +366,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -623,6 +629,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -978,6 +986,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -1013,6 +1023,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index 9a2f5d8..d7a9f4d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -231,6 +233,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -390,6 +394,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -543,6 +549,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -696,6 +704,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -856,6 +866,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index 6005fb2..d1ed08b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -75,6 +75,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -489,6 +493,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -603,13 +609,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -796,6 +803,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -910,13 +919,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -1099,6 +1109,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1427,6 +1439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1748,6 +1762,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1914,6 +1930,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -1987,13 +2005,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1 @@ -2132,6 +2151,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index d8e6b3f..88916ac 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -258,6 +260,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 3586eae..86a8993 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -90,6 +90,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -252,6 +254,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -414,6 +418,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -576,6 +582,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -732,6 +740,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -884,10 +894,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -913,7 +924,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1024,13 +1035,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -1055,7 +1067,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index b072ffc..7eff6d2 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -71,13 +71,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -102,7 +103,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -252,13 +253,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -283,7 +285,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -661,6 +663,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index 74caa3f..9e1c651 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -83,6 +83,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -222,6 +224,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -387,6 +391,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index b896193..c2496ff 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -84,6 +84,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -328,6 +330,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -598,13 +602,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: a (type: string), b (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -629,7 +634,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index 8da5735..47e7127 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -72,10 +72,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -101,7 +102,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -146,6 +147,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -264,10 +267,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -293,7 +297,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -338,6 +342,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 2:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) @@ -483,10 +489,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string) @@ -512,7 +519,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -552,13 +559,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 6c4ae65..f163ff9 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -219,6 +221,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 80e073b..fa02dff 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -237,6 +239,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -405,6 +409,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -574,6 +580,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int), 0L (type: bigint) @@ -780,6 +788,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -941,6 +951,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1109,6 +1121,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1271,6 +1285,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -1474,10 +1490,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1503,7 +1520,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1630,10 +1647,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1659,7 +1677,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1788,10 +1806,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int) @@ -1817,7 +1836,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1941,6 +1960,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2107,6 +2128,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2273,6 +2296,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) @@ -2434,6 +2459,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: int), value (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index e67bca7..62a4832 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -76,6 +76,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -277,6 +279,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -478,6 +482,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: a (type: string), b (type: string), 0L (type: bigint) @@ -677,6 +683,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) @@ -868,10 +876,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: a (type: string) @@ -898,7 +907,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1051,13 +1060,14 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR keys: _col0 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -1083,7 +1093,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index dc3363d..4d46fa8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -74,6 +74,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, ConstantVectorExpression(val 0) -> 4:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: category (type: int), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 98e6e54..9111ae6 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -89,6 +89,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -104,10 +106,11 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out new file mode 100644 index 0000000..31f7ceb --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_multikey.q.out @@ -0,0 +1,2381 @@ +PREHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a.key1 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 EXPRESSION [] +POSTHOOK: Lineage: groupby_multi_1a.key1 EXPRESSION [] +PREHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: Lineage: groupby_multi_1a_nonull.key0 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a_nonull.key1 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +NULL NULL 2 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 61 Data size: 3540 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 30 Data size: 1740 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +NULL NULL +PREHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_multi_1a_nonull + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 55 Data size: 3240 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 27 Data size: 1590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string), ts (type: timestamp) + outputColumnNames: bo, s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string) + outputColumnNames: bo, s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), d (type: double), ts (type: timestamp) + outputColumnNames: si, d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), ts (type: timestamp) + outputColumnNames: si, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: f (type: float), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: f, dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(f) + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: si, i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(si) + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashMultiKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 +65536 4294967307 +65536 4294967308 +65536 4294967312 +65536 4294967317 +65536 4294967320 +65536 4294967326 +65536 4294967334 +65536 4294967336 +65536 4294967338 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index d90ebf0..60691a8 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -268,10 +268,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -296,7 +297,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -467,10 +468,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -494,7 +496,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -763,6 +765,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -829,6 +833,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: 1 (type: int) @@ -987,6 +993,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index ef49d90..9f8feb1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -327,6 +329,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -673,6 +677,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) @@ -708,6 +714,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string), val (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out new file mode 100644 index 0000000..be73eb4 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_groupby_singlekey.q.out @@ -0,0 +1,11691 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +NULL +PREHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1a_nonull + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +1569543799237464101 +3313583664488247651 +968819023021777205 +PREHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +3313583664488247651 +968819023021777205 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +NULL +PREHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1b_nonull + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 3008 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +NULL +PREHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_long_1c_nonull + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +PREHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 +1928928239 +PREHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: insert into groupby_decimal64_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1a values (333.33) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (333.33) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp EXPRESSION [] +POSTHOOK: Lineage: groupby_decimal64_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:decimal(6,3)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(6,3) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +NULL +PREHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a_nonull + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 0:decimal(6,3)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(6,3) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountKeyOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_KEY + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountStarOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: c_timestamp (type: timestamp), key (type: decimal(8,2)) + outputColumnNames: c_timestamp, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_timestamp) + Group By Vectorization: + className: VectorGroupByHashDecimal64KeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:decimal(8,2), VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +NULL +PREHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b_nonull + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashDecimal64KeyDuplicateReductionOperator + groupByMode: HASH + keyExpressions: col 1:decimal(8,2)/DECIMAL_64 + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:decimal(8,2) + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 19 Data size: 3496 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +NULL +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +QNCYBDW +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1a_nonull + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +MXGDMBD +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1144 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +MXGD +NULL +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1b_nonull + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +MXGD +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 11040 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5402 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 8464 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 4141 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +NULL +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_string_1c_nonull + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashStringKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 41 Data size: 7360 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 680 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +NULL +PREHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1a_nonull + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 10304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 5042 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 47 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 900 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +NULL +PREHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: groupby_serialize_1b_nonull + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 66 Data size: 2560 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 33 Data size: 1280 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:35.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashStringKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountKeyOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountColumnOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashLongKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByHashLongKeyDuplicateReductionOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: MERGE_PARTIAL + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out index 79ca6d9..e9df667 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out @@ -73,12 +73,13 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -494,10 +495,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 6:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -524,7 +526,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -565,12 +567,13 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:double) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -679,10 +682,11 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: 1 (type: int) @@ -709,7 +713,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -756,12 +760,13 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out index 6c6986e..2336ae1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out @@ -55,7 +55,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct] @@ -66,7 +66,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -78,19 +78,20 @@ STAGE PLANS: keys: key (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_COLUMN mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -99,7 +100,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs @@ -147,13 +148,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index ec3e2b8..2a21793 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -170,6 +170,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -301,6 +303,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index bb555df..99028c2 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -313,10 +313,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -342,7 +343,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5fb8258..65cb5af 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -128,6 +128,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -278,6 +280,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -499,6 +503,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -710,6 +716,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index dc8f47e..6b522bc 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -78,7 +78,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 17704e5..750cead 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -3389,9 +3389,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3405,7 +3406,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3502,9 +3503,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3518,7 +3520,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3617,9 +3619,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3633,7 +3636,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3727,9 +3730,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3743,7 +3747,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3845,9 +3849,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3861,7 +3866,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3928,9 +3933,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -3944,7 +3950,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4041,9 +4047,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4057,7 +4064,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4151,9 +4158,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4167,7 +4175,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 2 @@ -4290,9 +4298,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4306,7 +4315,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4416,9 +4425,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4432,7 +4442,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4552,9 +4562,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4568,7 +4579,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4675,9 +4686,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator @@ -4691,7 +4703,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4796,9 +4808,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4812,7 +4825,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4826,9 +4839,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4842,7 +4856,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4962,9 +4976,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -4978,7 +4993,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5105,9 +5120,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5121,7 +5137,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5233,9 +5249,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5249,7 +5266,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5380,9 +5397,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5396,7 +5414,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5529,9 +5547,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5545,7 +5564,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 5 @@ -5705,9 +5724,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator @@ -5721,7 +5741,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -5869,9 +5889,10 @@ STAGE PLANS: className: VectorSelectOperator native: true Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkStringOperator @@ -5885,7 +5906,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true @@ -6009,10 +6030,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6038,7 +6060,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6211,10 +6233,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6240,7 +6263,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6415,10 +6438,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6444,7 +6468,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6619,10 +6643,11 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -6648,7 +6673,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6826,10 +6851,11 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -6855,7 +6881,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6966,10 +6992,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -6995,7 +7022,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7173,10 +7200,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -7202,7 +7230,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7377,10 +7405,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7406,7 +7435,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7637,10 +7666,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7666,7 +7696,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -7852,10 +7882,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -7882,7 +7913,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8109,10 +8140,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8138,7 +8170,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8321,10 +8353,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -8350,7 +8383,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8534,10 +8567,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8563,7 +8597,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8597,10 +8631,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8626,7 +8661,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8834,10 +8869,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -8863,7 +8899,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9064,10 +9100,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9093,7 +9130,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9278,10 +9315,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9307,7 +9345,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9528,10 +9566,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9557,7 +9596,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9780,10 +9819,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9809,7 +9849,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10090,10 +10130,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10119,7 +10160,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10366,10 +10407,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10395,7 +10437,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10529,10 +10571,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10558,7 +10601,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10732,10 +10775,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10761,7 +10805,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10937,10 +10981,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -10966,7 +11011,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11142,10 +11187,11 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -11171,7 +11217,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11350,10 +11396,11 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -11379,7 +11426,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11490,10 +11537,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -11519,7 +11567,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11698,10 +11746,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -11727,7 +11776,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11903,10 +11952,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -11932,7 +11982,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12165,10 +12215,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12194,7 +12245,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12381,10 +12432,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12411,7 +12463,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12638,10 +12690,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -12667,7 +12720,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -12851,10 +12904,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -12880,7 +12934,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13064,10 +13118,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13093,7 +13148,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13127,10 +13182,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13156,7 +13212,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13364,10 +13420,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13393,7 +13450,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13594,10 +13651,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13623,7 +13681,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13808,10 +13866,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -13837,7 +13896,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14058,10 +14117,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14087,7 +14147,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14310,10 +14370,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14339,7 +14400,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14622,10 +14683,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -14651,7 +14713,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -14899,10 +14961,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -14928,7 +14991,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15062,10 +15125,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15091,7 +15155,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15265,10 +15329,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15294,7 +15359,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15470,10 +15535,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -15499,7 +15565,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15675,10 +15741,11 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col1 (type: int), _col1 (type: int) @@ -15704,7 +15771,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -15883,10 +15950,11 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -15912,7 +15980,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16023,10 +16091,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16052,7 +16121,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16231,10 +16300,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -16260,7 +16330,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16436,10 +16506,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16465,7 +16536,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16698,10 +16769,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16727,7 +16799,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -16914,10 +16986,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -16944,7 +17017,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17171,10 +17244,11 @@ STAGE PLANS: Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17200,7 +17274,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17384,10 +17458,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: string) @@ -17413,7 +17488,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17597,10 +17672,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17626,7 +17702,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17660,10 +17736,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17689,7 +17766,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17897,10 +17974,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -17926,7 +18004,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18127,10 +18205,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18156,7 +18235,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18341,10 +18420,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18370,7 +18450,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18591,10 +18671,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18620,7 +18701,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18843,10 +18924,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -18872,7 +18954,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -19155,10 +19237,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -19184,7 +19267,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -19432,10 +19515,11 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -19461,7 +19545,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 37821fb..e6c9937 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -42,10 +42,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -70,7 +71,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -141,10 +142,11 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -169,7 +171,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -314,10 +316,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -342,7 +345,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -415,10 +418,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 17:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -443,7 +447,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 7a2cd54..e219595 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -272,6 +274,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out index 0b645ab..81cb944 100644 --- ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vector_orc_nested_column_pruning.q.out @@ -1064,11 +1064,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 9:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: double) @@ -1093,7 +1093,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1173,12 +1173,27 @@ STAGE PLANS: alias: nested_tbl_1 Pruned Column Paths: s1.f3 Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 8:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1187,16 +1202,23 @@ STAGE PLANS: key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1263,12 +1285,27 @@ STAGE PLANS: alias: nested_tbl_1 Pruned Column Paths: s1.f3 Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true Select Operator expressions: s1.f3 (type: struct), s1.f3.f4 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [8, 10] + selectExpressions: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 8:struct, VectorUDFStructField(col 9:struct, col 0:int)(children: VectorUDFStructField(col 1:struct,f6:int>, col 2:int) -> 9:struct) -> 10:int Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) + Group By Vectorization: + className: VectorGroupByHashSerializeKeySingleCountColumnOperator + groupByMode: HASH + keyExpressions: col 8:struct + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: struct) mode: hash outputColumnNames: _col0, _col1 @@ -1277,16 +1314,23 @@ STAGE PLANS: key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: _col0 (type: struct) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 316 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: llap Reduce Vectorization: @@ -1750,10 +1794,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 9:int, col 12:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: boolean) @@ -1777,7 +1822,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -2202,11 +2247,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 11:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 10:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2231,7 +2276,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2341,11 +2386,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 9:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2370,7 +2415,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2480,11 +2525,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 12:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 11:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2509,7 +2554,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2627,7 +2672,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + notVectorizedReason: exception: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo cannot be cast to org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo stack trace: org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getStructFieldIndex(VectorizationContext.java:889), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUDFStructField(VectorizationContext.java:877), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:858), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:754), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:742), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doVectorizeGroupByOperatorPreparation(Vectorizer.java:4698), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperator(Vectorizer.java:5272), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChild(Vectorizer.java:948), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChildren(Vectorizer.java:834), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperatorTree(Vectorizer.java:801), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.access$2300(Vectorizer.java:263), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:1989), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:1941), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapWork(Vectorizer.java:1914), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.convertMapWork(Vectorizer.java:1136), ... vectorized: false Reducer 2 Execution mode: llap @@ -2720,11 +2765,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 13:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 12:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -2749,7 +2794,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2867,7 +2912,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + notVectorizedReason: exception: java.lang.ClassCastException: org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo cannot be cast to org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo stack trace: org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getStructFieldIndex(VectorizationContext.java:889), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getGenericUDFStructField(VectorizationContext.java:877), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpression(VectorizationContext.java:858), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:754), org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.getVectorExpressions(VectorizationContext.java:742), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doVectorizeGroupByOperatorPreparation(Vectorizer.java:4698), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperator(Vectorizer.java:5272), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChild(Vectorizer.java:948), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.doProcessChildren(Vectorizer.java:834), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.validateAndVectorizeOperatorTree(Vectorizer.java:801), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer.access$2300(Vectorizer.java:263), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:1989), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapOperators(Vectorizer.java:1941), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.validateAndVectorizeMapWork(Vectorizer.java:1914), org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer$VectorizationDispatcher.convertMapWork(Vectorizer.java:1136), ... vectorized: false Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index e4bc4f0..84040c2 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -147,6 +147,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 4901e83..5054a83 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -745,6 +745,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a841d4c..dbe4d7e 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -320,6 +320,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_outer_join_no_keys.q.out ql/src/test/results/clientpositive/llap/vector_outer_join_no_keys.q.out new file mode 100644 index 0000000..a7f4eb8 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_outer_join_no_keys.q.out @@ -0,0 +1,409 @@ +PREHOOK: query: create temporary table foo(x int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create temporary table foo(x int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert into foo values(1),(2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@foo +POSTHOOK: query: insert into foo values(1),(2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.x SCRIPT [] +PREHOOK: query: create temporary table bar(y int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bar +POSTHOOK: query: create temporary table bar(y int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bar +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product +PREHOOK: query: explain vectorization detail +select count(*) from bar right outer join foo +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(*) from bar right outer join foo +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: bar + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:y:int, 1:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [] + dataColumns: y:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:x:int, 1:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: + 0 + 1 {true} + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + input vertices: + 0 Map 1 + Statistics: Num rows: 2 Data size: 10 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByHashMultiKeySingleCountStarOperator + groupByMode: HASH + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + singleCountAggreation: COUNT_STAR + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [] + dataColumns: x:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 2' is a cross product +PREHOOK: query: -- = 2 + +select count(*) from bar right outer join foo +PREHOOK: type: QUERY +PREHOOK: Input: default@bar +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: -- = 2 + +select count(*) from bar right outer join foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bar +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +2 +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- = 2 + +explain vectorization detail +select count(*) from bar, foo +PREHOOK: type: QUERY +POSTHOOK: query: -- = 2 + +explain vectorization detail +select count(*) from bar, foo +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: bar + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:y:int, 1:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [] + dataColumns: y:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: foo + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:x:int, 1:ROW__ID:struct] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [] + dataColumns: x:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 2 Data size: 10 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: -- = 0 + +select count(*) from bar, foo +PREHOOK: type: QUERY +PREHOOK: Input: default@bar +PREHOOK: Input: default@foo +#### A masked pattern was here #### +POSTHOOK: query: -- = 0 + +select count(*) from bar, foo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bar +POSTHOOK: Input: default@foo +#### A masked pattern was here #### +0 diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index b1209d9..1aaf26e 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -287,6 +287,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -506,6 +508,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:decimal(15,2), col 1:decimal(15,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) @@ -1587,6 +1591,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1806,6 +1812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:decimal(7,2), col 1:decimal(7,2) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: c1 (type: decimal(7,2)), c2 (type: decimal(7,2)) diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 068453f..cdbe49b 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -116,6 +116,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -286,6 +288,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -456,6 +460,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -613,6 +619,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -770,6 +778,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 687b4af..096b8e6 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -449,11 +449,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -478,7 +478,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1380,11 +1380,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -1409,7 +1409,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2335,11 +2335,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -2364,7 +2364,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2874,11 +2874,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 2:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -2903,7 +2903,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3805,11 +3805,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:date - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) @@ -3834,7 +3834,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4760,11 +4760,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:timestamp - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_time (type: timestamp) @@ -4789,7 +4789,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 568549d..834f604 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: name (type: string), age (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 3f92327..e7d1440 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -67,6 +67,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index 1ed694d..99dc4ae 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -102,10 +102,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 8fb0752..2a9ff12 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -119,6 +119,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -326,6 +328,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 5b43765..ad81549 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -354,10 +354,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -382,7 +383,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9859824..0ba1b70 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2791,6 +2791,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -2934,6 +2936,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index de30ca7..9b80dfd 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Group By Operator aggregations: count(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 7:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashStringKeySingleCountColumnOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -86,7 +86,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index c713303..ebfa58c 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -234,6 +234,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 1:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) @@ -442,6 +444,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 1:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) @@ -3811,6 +3815,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, col 2:string, col 5:int, col 7:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: p_name (type: string), p_mfgr (type: string), p_size (type: int), p_retailprice (type: double) @@ -4501,6 +4507,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) @@ -6055,6 +6063,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 1:string, col 5:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_name (type: string), p_size (type: int) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 8dcb900..754928f 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -50,6 +50,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: key (type: string) @@ -293,6 +295,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: int) @@ -530,6 +534,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index b2db5a5..01c582a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -54,6 +54,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -235,6 +237,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -566,6 +570,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -747,6 +753,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1078,6 +1086,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1259,6 +1269,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1636,6 +1648,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index c87926c..fe144f6 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -87,6 +87,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 0ead6c4..08c5cc5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -111,6 +111,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index d72c298..12f1fd8 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -467,6 +469,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 7ae99a3..306dfa5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -113,6 +113,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 31363df..a2e6adf 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -109,6 +109,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 59f2d10..fd067f5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 83833da..af8860b 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 3c502cd..4980355 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -96,6 +96,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index a8cfa48..037bed2 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -91,6 +91,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 5124740..0d9306f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -84,6 +84,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 59f2d10..fd067f5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -86,6 +86,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 7be4d7d..33b246a 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -291,6 +291,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -491,10 +493,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -522,7 +525,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -651,10 +654,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -681,7 +685,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -892,6 +896,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index e6427fa..97a924b 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -44,6 +44,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 80c7c0c..ec2f54f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: (cdouble + 2.0D) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 3a5c272..bf101d0 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -118,6 +118,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -380,6 +382,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -634,6 +638,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -867,6 +873,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2202,6 +2210,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2479,6 +2489,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2800,6 +2812,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3202,6 +3216,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3436,10 +3452,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3461,7 +3477,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3551,10 +3567,10 @@ STAGE PLANS: Group By Operator aggregations: count(i) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3576,7 +3592,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3738,10 +3754,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3763,7 +3779,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3853,10 +3869,10 @@ STAGE PLANS: Group By Operator aggregations: count(ctinyint) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3878,7 +3894,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3968,10 +3984,10 @@ STAGE PLANS: Group By Operator aggregations: count(cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3993,7 +4009,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4083,10 +4099,10 @@ STAGE PLANS: Group By Operator aggregations: count(cfloat) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:float) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4108,7 +4124,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4198,10 +4214,10 @@ STAGE PLANS: Group By Operator aggregations: count(cstring1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4223,7 +4239,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4313,10 +4329,10 @@ STAGE PLANS: Group By Operator aggregations: count(cboolean1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:boolean) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4338,7 +4354,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index aec161d..3284422 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -306,6 +306,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -450,6 +452,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 8351192..f4d9cb8 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -1262,6 +1262,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index f19e2ca..21844dc 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -73,6 +73,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -200,10 +202,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -229,7 +232,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -282,6 +285,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 15b62c9..dc17a87 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -85,7 +85,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -308,7 +308,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -625,7 +625,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -669,7 +669,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1068,7 +1068,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1380,7 +1380,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1690,7 +1690,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1848,7 +1848,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2305,7 +2305,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2440,7 +2440,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2785,7 +2785,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -2943,7 +2943,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3055,7 +3055,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -3219,7 +3219,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3361,7 +3361,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 6 @@ -3405,7 +3405,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4250,7 +4250,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 10 @@ -4308,7 +4308,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 7 @@ -4600,7 +4600,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -4644,7 +4644,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4776,7 +4776,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -4820,7 +4820,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4864,7 +4864,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4986,7 +4986,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5045,7 +5045,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5164,7 +5164,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5208,7 +5208,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5316,7 +5316,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5360,7 +5360,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5468,7 +5468,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5512,7 +5512,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5628,7 +5628,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5782,7 +5782,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -5826,7 +5826,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -5917,7 +5917,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6060,7 +6060,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 3 @@ -6160,7 +6160,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 @@ -6204,7 +6204,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -6248,7 +6248,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -6428,7 +6428,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 4 @@ -6536,7 +6536,7 @@ STAGE PLANS: vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 228bd9d..6508148 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -137,6 +137,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -371,6 +373,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -605,6 +609,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -840,6 +846,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -911,6 +919,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1177,6 +1187,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1205,6 +1217,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash @@ -1468,6 +1482,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 8abd234..6a78bc2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -75,6 +75,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index f05e5c0..6c38796 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -206,6 +206,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -409,6 +411,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -612,6 +616,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index acb9126..e827b60 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -298,6 +298,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index ccf9aae..8a5fbdf 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -3542,6 +3542,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 46a2470..c4b425a 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -146,6 +146,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -371,6 +373,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -512,6 +516,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index c9dd434..b2d68c2 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -806,6 +806,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -933,6 +935,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1078,6 +1082,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out index c67e8d1..be5e87e 100644 --- ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/orc_struct_type_vectorization.q.out @@ -235,6 +235,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single Key Column IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out index 03488a1..477d84f 100644 --- ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_complex_types_vectorization.q.out @@ -211,6 +211,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -483,6 +485,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) @@ -755,6 +759,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out index 289909d..d63b69f 100644 --- ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out @@ -227,6 +227,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out index ed9bb09..30e8769 100644 --- ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/parquet_struct_type_vectorization.q.out @@ -235,6 +235,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index fbb78b1..d7bf728 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -48,6 +48,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -214,6 +216,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -536,6 +540,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -702,6 +708,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1024,6 +1032,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1190,6 +1200,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1558,6 +1570,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/parquet_vectorization_1.q.out index afada38..7be81ce 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_1.q.out @@ -81,6 +81,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/parquet_vectorization_12.q.out index c284977..a81666e 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_12.q.out @@ -105,6 +105,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/parquet_vectorization_13.q.out index 6dd6e3f..fd5b5d2 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_13.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -437,6 +439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index c501fab..e53dead 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index 39057d6..fd3ad6f 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -103,6 +103,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/parquet_vectorization_16.q.out index cf06c91..1346464 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_16.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/parquet_vectorization_2.q.out index 131797d..6db1475 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_2.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/parquet_vectorization_3.q.out index f98dea6..ac1d348 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_3.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/parquet_vectorization_4.q.out index 973e2bd..cc7bd49 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_4.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/parquet_vectorization_5.q.out index e20dcbf..602972c 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_5.q.out @@ -78,6 +78,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/parquet_vectorization_9.q.out index cf06c91..1346464 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_9.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 8a81b34..0738381 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -229,6 +229,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -361,6 +363,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -616,6 +620,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index 7d23ba8..cba0c96 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -232,6 +234,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -559,6 +563,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -738,6 +744,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1065,6 +1073,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1244,6 +1254,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1617,6 +1629,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 010b19e..973ffac 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index fd2947d..175118b 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index bce1f8a..e462fc0 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -464,6 +466,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index 03afcc1..063de59 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 126cfd0..3d8584e 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index 303702c..0c738c6 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 423d2e3..7b873ff 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index 955f85c..40577f7 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index c3b5392..7b55772 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 56c62c3..9559cfb 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -83,6 +83,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index 303702c..0c738c6 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index 6fd173a..2ec36f8 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -264,6 +264,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -429,10 +431,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -458,7 +461,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -585,10 +588,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 5:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint), cdouble (type: double) @@ -614,7 +618,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -789,6 +793,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 1916d25..6543e70 100644 --- ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -73,10 +73,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ds (type: string) @@ -101,7 +102,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -312,10 +313,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -332,7 +334,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -797,10 +799,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -817,7 +820,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1277,10 +1280,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 6:bigint - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: bigint) @@ -1297,7 +1301,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1559,10 +1563,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 4:decimal(10,0) - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: decimal(10,0)) @@ -1579,7 +1584,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1835,10 +1840,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -1855,7 +1861,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -1898,10 +1904,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -1918,7 +1925,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -2537,10 +2544,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -2560,10 +2568,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -2580,7 +2589,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -3043,10 +3052,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -3063,7 +3073,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -3528,10 +3538,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -3548,7 +3559,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -3802,10 +3813,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -3822,7 +3834,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -4494,10 +4506,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 4:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -4514,7 +4527,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -4805,10 +4818,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 2008-04-08) -> 5:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: '2008-04-08' (type: string) @@ -4833,7 +4847,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -5237,10 +5251,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5260,10 +5275,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5280,7 +5296,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -5531,10 +5547,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5551,7 +5568,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -5784,10 +5801,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -5804,7 +5822,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6036,10 +6054,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -6056,7 +6075,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6290,10 +6309,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -6310,7 +6330,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6353,10 +6373,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -6373,7 +6394,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -6967,6 +6988,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7022,6 +7045,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7091,10 +7116,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7111,10 +7137,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7161,10 +7188,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7181,10 +7209,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7272,6 +7301,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7327,6 +7358,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7457,10 +7490,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7515,10 +7549,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7604,6 +7639,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7659,6 +7696,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7728,10 +7767,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7748,10 +7788,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7798,10 +7839,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7818,10 +7860,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -7909,6 +7952,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -7964,6 +8009,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8096,10 +8143,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8154,10 +8202,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8244,6 +8293,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8299,6 +8350,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8368,10 +8421,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8388,10 +8442,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8438,10 +8493,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8458,10 +8514,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8492,10 +8549,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ds (type: string) @@ -8520,7 +8578,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8553,6 +8611,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8608,6 +8668,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -8738,10 +8800,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8796,10 +8859,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8903,10 +8967,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -8923,7 +8988,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8979,6 +9044,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9146,10 +9213,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -9166,7 +9234,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9223,6 +9291,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9382,10 +9452,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9402,7 +9473,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9454,10 +9525,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9474,7 +9546,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9547,6 +9619,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9717,10 +9791,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9740,10 +9815,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 2:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -9760,7 +9836,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9815,6 +9891,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -9981,10 +10059,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10001,7 +10080,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10057,6 +10136,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10215,10 +10296,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 3:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -10235,7 +10317,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10292,6 +10374,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10449,10 +10533,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashSerializeKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:double - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: double) @@ -10469,7 +10554,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10526,6 +10611,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10671,10 +10758,11 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 2008-04-08) -> 5:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: '2008-04-08' (type: string) @@ -10699,7 +10787,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -10794,6 +10882,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -10955,10 +11045,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -10975,7 +11066,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11031,6 +11122,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11222,6 +11315,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11412,6 +11507,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11551,10 +11648,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -11571,7 +11669,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11623,10 +11721,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -11643,7 +11742,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -11716,6 +11815,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -11999,6 +12100,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12136,6 +12239,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12191,6 +12296,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12260,10 +12367,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12280,10 +12388,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12330,10 +12439,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12350,10 +12460,11 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12441,6 +12552,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12496,6 +12609,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single Key Column IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -12628,10 +12743,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -12686,10 +12802,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 9f5fa2a..faa2408 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -159,10 +159,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -183,7 +183,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -359,10 +359,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -383,7 +383,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -745,10 +745,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -769,7 +769,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1094,11 +1094,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1122,7 +1122,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1230,11 +1230,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1258,7 +1258,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1366,11 +1366,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1394,7 +1394,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 @@ -1502,11 +1502,11 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashLongKeySingleCountStarOperator groupByMode: HASH keyExpressions: col 5:boolean - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) @@ -1530,7 +1530,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: true vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 83d5a62..61cc2b2 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -148,6 +148,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 1444cd8..42abfdf 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1264,10 +1264,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 16:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ws_order_number (type: int) @@ -1290,7 +1291,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -1317,10 +1318,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_data_types.q.out ql/src/test/results/clientpositive/spark/vector_data_types.q.out index 310a23a..ce5d591 100644 --- ql/src/test/results/clientpositive/spark/vector_data_types.q.out +++ ql/src/test/results/clientpositive/spark/vector_data_types.q.out @@ -374,6 +374,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index d37a27e..7aad2e1 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -87,6 +87,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -265,6 +267,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -477,6 +481,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -674,6 +680,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 0236980..9ea04f1 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -139,10 +139,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) @@ -165,7 +166,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index a68002e..2bee2bf 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -145,6 +145,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 168aa77..31dcd41 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -193,12 +193,12 @@ PREHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1a PREHOOK: Input: default@orc_table_2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 PREHOOK: query: explain vectorization detail select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 @@ -245,10 +245,11 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -269,7 +270,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -365,12 +366,12 @@ PREHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1a PREHOOK: Input: default@orc_table_2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t2.c from orc_table_2a t2 left semi join orc_table_1a t1 on t1.a = t2.c where t2.c > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1a POSTHOOK: Input: default@orc_table_2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 PREHOOK: query: CREATE TABLE orc_table_1b(v1 STRING, a INT) STORED AS ORC PREHOOK: type: CREATETABLE @@ -569,12 +570,12 @@ PREHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 3 PREHOOK: query: explain vectorization detail select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -730,12 +731,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_ta PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 3 3 THREE PREHOOK: query: explain vectorization detail select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -900,12 +901,12 @@ PREHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join or PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a*2, t2.c*5, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three 6 15 THREE PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -1069,12 +1070,12 @@ PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three THREE 3 PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 @@ -1238,12 +1239,12 @@ PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_2b t2 join orc_table_1b t1 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 three THREE PREHOOK: query: explain vectorization detail select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 @@ -1407,12 +1408,12 @@ PREHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t2.v2, t2.c from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### three THREE 3 PREHOOK: query: explain vectorization detail select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 @@ -1576,10 +1577,10 @@ PREHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1b PREHOOK: Input: default@orc_table_2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.a, t1.v1, t2.v2 from orc_table_1b t1 join orc_table_2b t2 on t1.a = t2.c where t1.a > 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1b POSTHOOK: Input: default@orc_table_2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3 three THREE diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index ac7c821..2f1137b 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -131,7 +131,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index ff1af2c..a4da27f 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -89,10 +89,11 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int) @@ -113,7 +114,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -141,10 +142,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -167,7 +169,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -363,10 +365,11 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 0:int, col 17:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) @@ -387,7 +390,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Local Work: @@ -415,10 +418,11 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashLongKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 1:int - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: l_partkey (type: int) @@ -441,7 +445,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index e6546c5..4feea14 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -146,6 +146,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index bc9d102..df5339a 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -37,11 +37,11 @@ POSTHOOK: Lineage: orc_table_2.v2 SCRIPT [] PREHOOK: query: select * from orc_table_1 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from orc_table_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL one 1 @@ -51,11 +51,11 @@ two 2 PREHOOK: query: select * from orc_table_2 PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from orc_table_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 0 ZERO 2 TWO 3 THREE @@ -203,12 +203,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer j PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL NULL NULL NULL NULL one 1 NULL NULL @@ -356,12 +356,12 @@ PREHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer PREHOOK: type: QUERY PREHOOK: Input: default@orc_table_1 PREHOOK: Input: default@orc_table_2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 right outer join orc_table_2 t2 on t1.a = t2.c POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_table_1 POSTHOOK: Input: default@orc_table_2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL 0 ZERO NULL NULL 4 FOUR NULL NULL NULL diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 9a1fa53..7b46d00 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4a.ctinyint SIMPLE [] PREHOOK: query: select * from small_alltypesorc1a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL @@ -102,11 +102,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc2a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -115,11 +115,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false @@ -128,11 +128,11 @@ NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 19 PREHOOK: query: select * from small_alltypesorc4a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_alltypesorc_a stored as orc as select * from (select * from (select * from small_alltypesorc1a) sq1 union all @@ -187,20 +187,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a PREHOOK: Output: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a POSTHOOK: Output: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -6907 253665376 NULL -64.0 -6907.0 1cGVWH7n1QU NULL NULL 1969-12-31 15:59:53.66 true NULL @@ -365,14 +365,14 @@ left outer join small_alltypesorc_a cd on cd.cint = c.cint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a c left outer join small_alltypesorc_a cd on cd.cint = c.cint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -8080 528534767 NULL -64.0 -8080.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:58.044 1969-12-31 15:59:48.655 true NULL @@ -539,14 +539,14 @@ left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select c.ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -64 -64 @@ -832,6 +832,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -919,7 +921,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*), sum(t1.c_ctinyint) from (select c.ctinyint as c_ctinyint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -929,5 +931,5 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 145 -8960 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 32bcc9b..d95b11d 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4a.ctinyint SIMPLE [(alltypesorc)alltypesorc PREHOOK: query: select * from small_alltypesorc1a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false @@ -102,11 +102,11 @@ NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 19 PREHOOK: query: select * from small_alltypesorc2a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -115,11 +115,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL -13166 626923679 NULL NULL -13166.0 821UdmGbkEf4j NULL 1969-12-31 15:59:55.089 1969-12-31 16:00:15.69 true NULL NULL -14426 626923679 NULL NULL -14426.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.505 1969-12-31 16:00:13.309 true NULL NULL -14847 626923679 NULL NULL -14847.0 821UdmGbkEf4j NULL 1969-12-31 16:00:00.612 1969-12-31 15:59:43.704 true NULL @@ -128,11 +128,11 @@ NULL -15830 253665376 NULL NULL -15830.0 1cGVWH7n1QU NULL 1969-12-31 16:00:02.58 PREHOOK: query: select * from small_alltypesorc4a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -60 -200 NULL NULL -60.0 -200.0 NULL NULL 1969-12-31 16:00:11.996 1969-12-31 15:59:55.451 NULL NULL -61 -7196 NULL NULL -61.0 -7196.0 NULL 8Mlns2Tl6E0g 1969-12-31 15:59:44.823 1969-12-31 15:59:58.174 NULL false -61 -7196 NULL NULL -61.0 -7196.0 NULL fUJIN 1969-12-31 16:00:11.842 1969-12-31 15:59:58.174 NULL false @@ -192,20 +192,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a PREHOOK: Output: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a POSTHOOK: Output: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -60 -200 NULL NULL -60.0 -200.0 NULL NULL 1969-12-31 16:00:11.996 1969-12-31 15:59:55.451 NULL NULL -61 -7196 NULL NULL -61.0 -7196.0 NULL 8Mlns2Tl6E0g 1969-12-31 15:59:44.823 1969-12-31 15:59:58.174 NULL false -61 -7196 NULL NULL -61.0 -7196.0 NULL fUJIN 1969-12-31 16:00:11.842 1969-12-31 15:59:58.174 NULL false @@ -406,6 +406,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -493,7 +495,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*), sum(t1.c_cbigint) from (select c.cbigint as c_cbigint from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -503,5 +505,5 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 34 -26289186744 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index c5568b6..f8d1ec2 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4a.ctinyint SIMPLE [(alltypesorc)alltypesorc PREHOOK: query: select * from small_alltypesorc1a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false NULL NULL -850295959 -1887561756 NULL NULL WMIgGA73 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:00.348 false false NULL NULL -886426182 -1887561756 NULL NULL 0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 16:00:04.472 true false @@ -102,11 +102,11 @@ NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 19 PREHOOK: query: select * from small_alltypesorc2a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL NULL -1731061911 -51.0 NULL Pw53BBJ yL443x2437PO5Hv1U3lCjq2D 1969-12-31 16:00:08.451 NULL true false -51 NULL NULL -1846191223 -51.0 NULL Ul085f84S33Xd32u x1JC58g0Ukp 1969-12-31 16:00:08.451 NULL true true -51 NULL NULL -1874052220 -51.0 NULL c61B47I604gymFJ sjWQS78 1969-12-31 16:00:08.451 NULL false false @@ -115,11 +115,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL -31312632 1086455747 -51.0 NULL NULL Bc7xt12568c451o64LF5 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -337975743 608681041 -51.0 NULL NULL Ih2r28o6 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -413196097 -306198070 -51.0 NULL NULL F53QcSDPpxYF1Ub 1969-12-31 16:00:08.451 NULL NULL false @@ -128,11 +128,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc4a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -7196 NULL -1615920595 -64.0 -7196.0 NULL X5rDjl 1969-12-31 16:00:11.912 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -1639157869 -64.0 -7196.0 NULL IJ0Oj7qAiqNGsN7gn 1969-12-31 16:00:01.785 1969-12-31 15:59:58.174 NULL false -64 -7196 NULL -527203677 -64.0 -7196.0 NULL JBE4H5RoK412Cs260I72 1969-12-31 15:59:50.184 1969-12-31 15:59:58.174 NULL true @@ -192,20 +192,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a PREHOOK: Output: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a POSTHOOK: Output: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_a POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -51 NULL -31312632 1086455747 -51.0 NULL NULL Bc7xt12568c451o64LF5 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -337975743 608681041 -51.0 NULL NULL Ih2r28o6 1969-12-31 16:00:08.451 NULL NULL true -51 NULL -413196097 -306198070 -51.0 NULL NULL F53QcSDPpxYF1Ub 1969-12-31 16:00:08.451 NULL NULL false @@ -254,7 +254,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -264,7 +264,7 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 20 PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 @@ -294,7 +294,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -304,7 +304,7 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 28 PREHOOK: query: explain vectorization detail formatted select count(*) from (select c.cstring1 @@ -334,7 +334,7 @@ left outer join small_alltypesorc_a hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -344,5 +344,5 @@ left outer join small_alltypesorc_a hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 28 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index 9872ab1..a55250b 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -89,11 +89,11 @@ POSTHOOK: Lineage: small_alltypesorc4b.ctinyint SIMPLE [] PREHOOK: query: select * from small_alltypesorc1b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc1b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc1b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc1b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -3097 253665376 NULL -64.0 -3097.0 1cGVWH7n1QU NULL 1969-12-31 16:00:00.013 1969-12-31 16:00:06.097 true NULL @@ -107,11 +107,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc2b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc2b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc2b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc2b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -200 NULL -1809444706 -64.0 -200.0 NULL B87YVb3UASqg 1969-12-31 16:00:10.858 1969-12-31 15:59:55.451 NULL true -64 -200 NULL 2118653994 -64.0 -200.0 NULL ONHGSDy1U4Ft431DfQp15 1969-12-31 16:00:03.944 1969-12-31 15:59:55.451 NULL true -64 -200 NULL 927647669 -64.0 -200.0 NULL DhxkBT 1969-12-31 16:00:00.199 1969-12-31 15:59:55.451 NULL false @@ -125,11 +125,11 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### PREHOOK: query: select * from small_alltypesorc3b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc3b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc3b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc3b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### NULL NULL -1015272448 -1887561756 NULL NULL jTQ68531mP 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:45.854 false false NULL NULL -609074876 -1887561756 NULL NULL EcM71 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:55.061 true false NULL NULL -700300206 -1887561756 NULL NULL kdqQE010 4hA4KQj2vD3fI6gX82220d NULL 1969-12-31 15:59:58.384 false false @@ -143,11 +143,11 @@ NULL NULL -971543377 -1645852809 NULL NULL uN803aW xH7445Rals48VOulSyR5F NULL 19 PREHOOK: query: select * from small_alltypesorc4b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc4b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc4b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc4b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_alltypesorc_b stored as orc as select * from (select * from (select * from small_alltypesorc1b) sq1 union all @@ -202,20 +202,20 @@ PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b PREHOOK: Output: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b POSTHOOK: Output: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_b POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -15920 528534767 NULL -64.0 -15920.0 cvLH6Eat2yFsyy7p NULL 1969-12-31 15:59:51.859 1969-12-31 16:00:14.468 true NULL -64 -200 NULL -1809444706 -64.0 -200.0 NULL B87YVb3UASqg 1969-12-31 16:00:10.858 1969-12-31 15:59:55.451 NULL true @@ -265,14 +265,14 @@ left outer join small_alltypesorc_b cd on cd.cint = c.cint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd on cd.cint = c.cint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -3586 626923679 NULL -64.0 -3586.0 821UdmGbkEf4j NULL 1969-12-31 16:00:11.952 1969-12-31 15:59:51.131 true NULL -64 -10462 626923679 NULL -64.0 -10462.0 821UdmGbkEf4j NULL 1969-12-31 16:00:02.496 1969-12-31 16:00:00.164 true NULL -64 -4018 626923679 NULL -64.0 -4018.0 821UdmGbkEf4j NULL 1969-12-31 15:59:58.959 1969-12-31 16:00:07.803 true NULL @@ -346,14 +346,14 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### -64 -64 -64 @@ -792,7 +792,7 @@ left outer join small_alltypesorc_b hd ) t1 PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -802,5 +802,5 @@ left outer join small_alltypesorc_b hd ) t1 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 890 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index baf7204..680ee42 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -28,12 +28,12 @@ PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@sorted_mod_4 PREHOOK: Output: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@sorted_mod_4 POSTHOOK: Output: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 PREHOOK: type: CREATETABLE_AS_SELECT @@ -60,12 +60,12 @@ PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Output: default@small_table -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Output: default@small_table -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from sorted_mod_4 s @@ -89,7 +89,7 @@ on s.ctinyint = st.ctinyint PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, st.* from sorted_mod_4 s left outer join small_table st @@ -98,7 +98,7 @@ on s.ctinyint = st.ctinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6876 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -123,7 +123,7 @@ on s.ctinyint = sm.ctinyint and s.cmodint = 2 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -132,7 +132,7 @@ on s.ctinyint = sm.ctinyint and s.cmodint = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6058 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -157,7 +157,7 @@ on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -166,7 +166,7 @@ on s.ctinyint = sm.ctinyint and pmod(s.ctinyint, 4) = s.cmodint POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6248 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint @@ -191,7 +191,7 @@ on s.ctinyint = sm.ctinyint and s.ctinyint < 100 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.ctinyint, s.cmodint, sm.cbigint from sorted_mod_4 s left outer join small_table sm @@ -200,7 +200,7 @@ on s.ctinyint = sm.ctinyint and s.ctinyint < 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6876 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* @@ -231,7 +231,7 @@ left outer join sorted_mod_4 s2 PREHOOK: type: QUERY PREHOOK: Input: default@small_table PREHOOK: Input: default@sorted_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* from sorted_mod_4 s left outer join small_table sm @@ -242,7 +242,7 @@ left outer join sorted_mod_4 s2 POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table POSTHOOK: Input: default@sorted_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 3268334 PREHOOK: query: create table mod_8_mod_4 stored as orc as select pmod(ctinyint, 8) as cmodtinyint, pmod(cint, 4) as cmodint from alltypesorc @@ -272,12 +272,12 @@ PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Output: default@mod_8_mod_4 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Output: default@mod_8_mod_4 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 PREHOOK: type: CREATETABLE_AS_SELECT @@ -304,12 +304,12 @@ PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table2 PREHOOK: Output: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 POSTHOOK: Output: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* from mod_8_mod_4 s @@ -333,7 +333,7 @@ on s.cmodtinyint = st.cmodtinyint PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, st.* from mod_8_mod_4 s left outer join small_table2 st @@ -342,7 +342,7 @@ on s.cmodtinyint = st.cmodtinyint POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 39112 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -367,7 +367,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -376,7 +376,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodint = 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 11171 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -401,7 +401,7 @@ on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -410,7 +410,7 @@ on s.cmodtinyint = sm.cmodtinyint and pmod(s.cmodtinyint, 4) = s.cmodint POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 14371 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint @@ -435,7 +435,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.cmodtinyint, s.cmodint, sm.cbigint from mod_8_mod_4 s left outer join small_table2 sm @@ -444,7 +444,7 @@ on s.cmodtinyint = sm.cmodtinyint and s.cmodtinyint < 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 17792 PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, sm.*, s2.* @@ -475,7 +475,7 @@ left outer join mod_8_mod_4 s2 PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 PREHOOK: Input: default@small_table2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: select count(*) from (select s.*, sm.*, s2.* from mod_8_mod_4 s left outer join small_table2 sm @@ -486,5 +486,5 @@ left outer join mod_8_mod_4 s2 POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 POSTHOOK: Input: default@small_table2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### 6524438 diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index 30dbaf1..bb6a956 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -352,10 +352,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: - className: VectorGroupByOperator + className: VectorGroupByHashStringKeyDuplicateReductionOperator groupByMode: HASH keyExpressions: col 20:string - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single Key Column IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) @@ -379,7 +380,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index c3201bf..c18a1ea 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -232,6 +234,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -559,6 +563,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -738,6 +744,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1065,6 +1073,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -1244,6 +1254,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1617,6 +1629,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index 71625e0..d14e70a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -86,6 +86,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 24cfa4e..a476f53 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -110,6 +110,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index c2a8006..ea64131 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -464,6 +466,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 95bf29b..ed0b888 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -112,6 +112,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index d0b03b3..eed3282 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -108,6 +108,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 8798ebe..f038c47 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index 99afc2b..12932ad 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index 2bccf64..fd9c7d5 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -95,6 +95,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index 922eb90..309fd42 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index 4cf4548..7ef72c6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -83,6 +83,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 8798ebe..f038c47 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -85,6 +85,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index c46fc03..c8d0e88 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -43,6 +43,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out index d58a989..8061f9b 100644 --- ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out @@ -232,7 +232,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -321,7 +321,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -520,7 +520,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -605,7 +605,7 @@ STAGE PLANS: enabled: true inputFormatFeatureSupport: [] featureSupportInUse: [] - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 6215906..6402e3d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -117,6 +117,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash @@ -377,6 +379,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash @@ -629,6 +633,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash @@ -860,6 +866,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash @@ -2185,6 +2193,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:smallint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: _col0 (type: smallint) @@ -2460,6 +2470,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: _col0 (type: double) @@ -2779,6 +2791,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) @@ -3179,6 +3193,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: boolean) @@ -3411,10 +3427,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3435,7 +3451,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3524,10 +3540,10 @@ STAGE PLANS: Group By Operator aggregations: count(i) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3548,7 +3564,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3709,10 +3725,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountStarOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3733,7 +3749,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3822,10 +3838,10 @@ STAGE PLANS: Group By Operator aggregations: count(ctinyint) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:tinyint) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3846,7 +3862,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -3935,10 +3951,10 @@ STAGE PLANS: Group By Operator aggregations: count(cint) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -3959,7 +3975,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4048,10 +4064,10 @@ STAGE PLANS: Group By Operator aggregations: count(cfloat) Group By Vectorization: - aggregators: VectorUDAFCount(col 4:float) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4072,7 +4088,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4161,10 +4177,10 @@ STAGE PLANS: Group By Operator aggregations: count(cstring1) Group By Vectorization: - aggregators: VectorUDAFCount(col 6:string) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4185,7 +4201,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 @@ -4274,10 +4290,10 @@ STAGE PLANS: Group By Operator aggregations: count(cboolean1) Group By Vectorization: - aggregators: VectorUDAFCount(col 10:boolean) -> bigint - className: VectorGroupByOperator + className: VectorGroupByHashMultiKeySingleCountColumnOperator groupByMode: HASH - native: false + native: true + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -4298,7 +4314,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 58e295d..3baaeaa 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -303,6 +303,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -445,6 +447,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 5104c80..a248d32 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -120,6 +120,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index edc8f74..abbc6fb 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -3494,6 +3494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:string, col 3:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: p_mfgr (type: string), p_brand (type: string) diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 68b89a7..03da68b 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -797,6 +797,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -922,6 +924,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -1065,6 +1069,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 0f7fcc1..7ebfab9 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -140,6 +140,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -246,6 +248,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -352,6 +356,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out index 4d2b0dc..2e41e17 100644 --- ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_without_gby.q.out @@ -79,6 +79,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 3f9e90b..0f3cba4 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -193,6 +193,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -343,6 +345,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:binary native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bin (type: binary) diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 3d3d761..0de448d 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -143,6 +143,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index b38cbe7..4b58c6f 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -104,6 +104,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) @@ -292,6 +294,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:char(20) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: char(20)) diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 48d38c3..095c74d 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -72,6 +72,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) @@ -267,6 +269,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_data_types.q.out ql/src/test/results/clientpositive/vector_data_types.q.out index 688e6a6..aa01319 100644 --- ql/src/test/results/clientpositive/vector_data_types.q.out +++ ql/src/test/results/clientpositive/vector_data_types.q.out @@ -345,6 +345,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index 16c80f0..7c57b92 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -82,6 +82,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -224,6 +226,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) @@ -399,6 +403,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) @@ -560,6 +566,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 3:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: _col0 (type: int) diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index fd6d9c3..ddb9dfe 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -586,6 +586,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -1171,6 +1173,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index c3d2d89..0b31564 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -138,6 +138,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index 6b2c7fe..65993da 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -43,6 +43,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -186,6 +188,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -337,6 +341,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) @@ -488,6 +494,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: cint (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index dfac04d..da70a98 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -140,6 +140,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint, col 8:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: t (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index 01c5096..98043e7 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -53,6 +53,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -336,6 +338,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: key (type: string) diff --git ql/src/test/results/clientpositive/vector_groupby_multikey.q.out ql/src/test/results/clientpositive/vector_groupby_multikey.q.out new file mode 100644 index 0000000..5cb7b2a --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_multikey.q.out @@ -0,0 +1,2360 @@ +PREHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a.txt' OVERWRITE INTO TABLE groupby_multi_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: CREATE TABLE groupby_multi_1a STORED AS ORC AS SELECT * FROM groupby_multi_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a.key1 SIMPLE [(groupby_multi_1a_txt)groupby_multi_1a_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 EXPRESSION [] +POSTHOOK: Lineage: groupby_multi_1a.key1 EXPRESSION [] +PREHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2207-09-16', -13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-20', 18) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull_txt(key0 date, key1 tinyint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_multi_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_multi_1a_nonull.txt' OVERWRITE INTO TABLE groupby_multi_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_multi_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_multi_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_multi_1a_nonull STORED AS ORC AS SELECT * FROM groupby_multi_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_multi_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_multi_1a_nonull +POSTHOOK: Lineage: groupby_multi_1a_nonull.key0 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key0, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_multi_1a_nonull.key1 SIMPLE [(groupby_multi_1a_nonull_txt)groupby_multi_1a_nonull_txt.FieldSchema(name:key1, type:tinyint, comment:null), ] +PREHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2111-10-04', -81) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_multi_1a +POSTHOOK: query: insert into groupby_multi_1a values (date '2018-04-21', 19) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_multi_1a +POSTHOOK: Lineage: groupby_multi_1a.key0 SCRIPT [] +POSTHOOK: Lineage: groupby_multi_1a.key1 SCRIPT [] +PREHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +NULL NULL 2 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2018-04-20 18 1 +2018-04-21 19 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 2 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 2 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 61 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 1707 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +NULL NULL +PREHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2018-04-20 18 +2018-04-21 19 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1805-12-21 16 3 +1809-10-10 -28 1 +1820-12-15 51 1 +1833-09-17 16 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1859-01-20 16 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2006-12-15 16 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2151-11-20 16 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2207-09-16 NULL 2 +2249-12-20 51 1 +2251-08-16 -94 1 +2251-08-16 NULL 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +NULL -126 1 +PREHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1, count(*) from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 1 +1809-10-10 -28 1 +1820-12-15 51 1 +1845-11-11 -126 1 +1858-09-10 22 1 +1869-03-17 -126 1 +1879-03-14 51 1 +1892-05-06 -103 1 +1892-05-06 -121 1 +1892-05-06 61 1 +1937-09-06 -126 1 +1950-10-06 -39 1 +1960-04-02 -75 1 +1971-06-16 24 1 +1988-01-10 22 1 +2025-05-17 51 1 +2029-11-21 -75 1 +2059-05-11 -39 2 +2064-09-04 -126 1 +2083-03-10 51 1 +2086-09-20 -69 1 +2088-05-07 -15 1 +2111-10-04 -81 1 +2185-07-27 51 1 +2194-06-19 -126 1 +2196-04-12 22 1 +2204-06-14 22 1 +2207-04-24 -92 1 +2207-04-24 0 1 +2207-09-16 -105 1 +2207-09-16 -13 1 +2207-09-16 116 1 +2207-09-16 122 1 +2207-09-16 124 1 +2207-09-16 15 1 +2249-12-20 51 1 +2251-08-16 -94 1 +2268-07-27 -117 1 +2268-07-27 -12 2 +2268-07-27 114 1 +2268-07-27 118 1 +2268-07-27 43 1 +PREHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_multi_1a_nonull + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key0 (type: date), key1 (type: tinyint) + outputColumnNames: key0, key1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key0 (type: date), key1 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: date), _col1 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 55 Data size: 3232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: date), _col1 (type: tinyint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), KEY.reducesinkkey1 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1805-12-21 16 +1809-10-10 -28 +1820-12-15 51 +1833-09-17 16 +1845-11-11 -126 +1858-09-10 22 +1859-01-20 16 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2006-12-15 16 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2151-11-20 16 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2207-09-16 NULL +2249-12-20 51 +2251-08-16 -94 +2251-08-16 NULL +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +NULL -126 +PREHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key0, key1 from groupby_multi_1a_nonull where key0 != '2006-12-15' and key1 != 16 group by key0, key1 order by key0, key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_multi_1a_nonull +#### A masked pattern was here #### +1804-02-16 -39 +1809-10-10 -28 +1820-12-15 51 +1845-11-11 -126 +1858-09-10 22 +1869-03-17 -126 +1879-03-14 51 +1892-05-06 -103 +1892-05-06 -121 +1892-05-06 61 +1937-09-06 -126 +1950-10-06 -39 +1960-04-02 -75 +1971-06-16 24 +1988-01-10 22 +2025-05-17 51 +2029-11-21 -75 +2059-05-11 -39 +2064-09-04 -126 +2083-03-10 51 +2086-09-20 -69 +2088-05-07 -15 +2111-10-04 -81 +2185-07-27 51 +2194-06-19 -126 +2196-04-12 22 +2204-06-14 22 +2207-04-24 -92 +2207-04-24 0 +2207-09-16 -105 +2207-09-16 -13 +2207-09-16 116 +2207-09-16 122 +2207-09-16 124 +2207-09-16 15 +2249-12-20 51 +2251-08-16 -94 +2268-07-27 -117 +2268-07-27 -12 +2268-07-27 114 +2268-07-27 118 +2268-07-27 43 +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string), ts (type: timestamp) + outputColumnNames: bo, s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(ts) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: bo (type: boolean), s (type: string) + outputColumnNames: bo, s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string), bo (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: boolean) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: boolean), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, bo, count(*) from over10k group by s, bo order by s, bo limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen false 4 +alice allen true 4 +alice brown false 8 +alice brown true 6 +alice carson false 3 +alice carson true 7 +alice davidson false 10 +alice davidson true 8 +alice ellison false 9 +alice ellison true 6 +PREHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), d (type: double), ts (type: timestamp) + outputColumnNames: si, d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(d) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), ts (type: timestamp) + outputColumnNames: si, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp), si (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, si, count(*) from over10k group by ts, si order by ts, si limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 269 1 +2013-03-01 09:11:58.70307 280 2 +2013-03-01 09:11:58.70307 282 1 +2013-03-01 09:11:58.70307 299 1 +2013-03-01 09:11:58.70307 300 1 +2013-03-01 09:11:58.70307 333 1 +2013-03-01 09:11:58.70307 347 1 +2013-03-01 09:11:58.70307 356 1 +2013-03-01 09:11:58.70307 361 1 +2013-03-01 09:11:58.70307 374 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: f (type: float), dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: f, dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(f) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(f) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)), bin (type: binary) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(4,2)), _col1 (type: binary) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)), KEY._col1 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)), _col1 (type: binary) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), KEY.reducesinkkey1 (type: binary), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, bin, count(*) from over10k group by `dec`, bin order by `dec`, bin limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 american history 1 +0.01 values clariffication 1 +0.02 chemistry 1 +0.03 biology 1 +0.03 debate 1 +0.04 history 1 +0.05 education 1 +0.06 forestry 1 +0.06 linguistics 1 +0.06 values clariffication 1 +PREHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint) + outputColumnNames: si, i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(si) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(si) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b, count(*) from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 1 +65536 4294967307 1 +65536 4294967308 1 +65536 4294967312 1 +65536 4294967317 1 +65536 4294967320 1 +65536 4294967326 1 +65536 4294967334 1 +65536 4294967336 1 +65536 4294967338 1 +PREHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int), b (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: bigint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, b from over10k group by i, b order by i, b limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 4294967299 +65536 4294967307 +65536 4294967308 +65536 4294967312 +65536 4294967317 +65536 4294967320 +65536 4294967326 +65536 4294967334 +65536 4294967336 +65536 4294967338 diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 8a6135e..7cc60a5 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -266,6 +266,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -458,6 +460,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) @@ -734,6 +738,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_item_sk (type: int) @@ -932,6 +938,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 9:int, col 2:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) diff --git ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out new file mode 100644 index 0000000..f8fe62f --- /dev/null +++ ql/src/test/results/clientpositive/vector_groupby_singlekey.q.out @@ -0,0 +1,11238 @@ +PREHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a.txt' OVERWRITE INTO TABLE groupby_long_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_txt +PREHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: CREATE TABLE groupby_long_1a STORED AS ORC AS SELECT * FROM groupby_long_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SIMPLE [(groupby_long_1a_txt)groupby_long_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (-5206670856103795573) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a +POSTHOOK: query: insert into groupby_long_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a +POSTHOOK: Lineage: groupby_long_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1a_nonull.txt' OVERWRITE INTO TABLE groupby_long_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1a_nonull STORED AS ORC AS SELECT * FROM groupby_long_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SIMPLE [(groupby_long_1a_nonull_txt)groupby_long_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (-6187919478609154811) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1a_nonull values (1000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: query: insert into groupby_long_1a_nonull values (1000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1a_nonull +POSTHOOK: Lineage: groupby_long_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b.txt' OVERWRITE INTO TABLE groupby_long_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_txt +PREHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: CREATE TABLE groupby_long_1b STORED AS ORC AS SELECT * FROM groupby_long_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SIMPLE [(groupby_long_1b_txt)groupby_long_1b_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1b values (32030) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (32030) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b +POSTHOOK: query: insert into groupby_long_1b values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b +POSTHOOK: Lineage: groupby_long_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull_txt(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1b_nonull.txt' OVERWRITE INTO TABLE groupby_long_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1b_nonull STORED AS ORC AS SELECT * FROM groupby_long_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SIMPLE [(groupby_long_1b_nonull_txt)groupby_long_1b_nonull_txt.FieldSchema(name:key, type:smallint, comment:null), ] +PREHOOK: query: insert into groupby_long_1b_nonull values (31713) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (31713) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1b_nonull values (34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: query: insert into groupby_long_1b_nonull values (34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1b_nonull +POSTHOOK: Lineage: groupby_long_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c.txt' OVERWRITE INTO TABLE groupby_long_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_txt +PREHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: CREATE TABLE groupby_long_1c STORED AS ORC AS SELECT * FROM groupby_long_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c.key SIMPLE [(groupby_long_1c_txt)groupby_long_1c_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'TKTKGVGFW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (NULL, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull_txt(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_long_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_long_1c_nonull.txt' OVERWRITE INTO TABLE groupby_long_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_long_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_long_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_long_1c_nonull STORED AS ORC AS SELECT * FROM groupby_long_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_long_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_long_1c_nonull +POSTHOOK: Lineage: groupby_long_1c_nonull.b_string SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:b_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_long_1c_nonull.key SIMPLE [(groupby_long_1c_nonull_txt)groupby_long_1c_nonull_txt.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (1928928239, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string EXPRESSION [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_long_1c +POSTHOOK: query: insert into groupby_long_1c values (9999, 'NEW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_long_1c +POSTHOOK: Lineage: groupby_long_1c.b_string SCRIPT [] +POSTHOOK: Lineage: groupby_long_1c.key SCRIPT [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +-8460550397108077433 1 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a where key != -8460550397108077433 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 2 +-5310365297525168078 1 +-6187919478609154811 4 +1569543799237464101 1 +3313583664488247651 1 +800 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +NULL +PREHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a where key != -8460550397108077433 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +1569543799237464101 +3313583664488247651 +800 +968819023021777205 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +1569543799237464101 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1a_nonull where key != 1569543799237464101 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 1 +-5310365297525168078 1 +-6187919478609154811 5 +-8460550397108077433 1 +1000 1 +3313583664488247651 1 +968819023021777205 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1a_nonull + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: bigint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +1569543799237464101 +3313583664488247651 +968819023021777205 +PREHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1a_nonull where key != 1569543799237464101 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 +-5310365297525168078 +-6187919478609154811 +-8460550397108077433 +1000 +3313583664488247651 +968819023021777205 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +32030 2 +800 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 1 +31713 10 +800 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +NULL +PREHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b +#### A masked pattern was here #### +-25394 +31713 +32030 +800 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +32030 1 +34 1 +PREHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1b_nonull where key != 32030 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 1 +31713 11 +34 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1b_nonull + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: smallint) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: smallint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1b_nonull where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1b_nonull +#### A masked pattern was here #### +-25394 +31713 +32030 +34 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 5 +9999 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 5 +9999 1 +PREHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: key, b_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(b_string) from groupby_long_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +9999 1 +NULL 3 +PREHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +9999 1 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 16 Data size: 1203 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +NULL +PREHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c where key != -32030 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +9999 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 5 +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 4 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 4 +1725068083 1 +1928928239 2 +PREHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(b_string) from groupby_long_1c_nonull where key != -1437463633 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 1 +1928928239 2 +PREHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_long_1c_nonull + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 10 Data size: 670 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 335 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_long_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +-1437463633 +1725068083 +1928928239 +PREHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_long_1c_nonull where key != -1437463633 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_long_1c_nonull +#### A masked pattern was here #### +1725068083 +1928928239 +PREHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a.txt' OVERWRITE INTO TABLE groupby_decimal64_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a +PREHOOK: query: insert into groupby_decimal64_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1a values (333.33) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (333.33) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a values (800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: query: insert into groupby_decimal64_1a values (800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a +POSTHOOK: Lineage: groupby_decimal64_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1a_nonull(key decimal(6,3)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1a_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1a_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (-76.2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: query: insert into groupby_decimal64_1a_nonull values (100) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1a_nonull +POSTHOOK: Lineage: groupby_decimal64_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b.txt' OVERWRITE INTO TABLE groupby_decimal64_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b +PREHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp EXPRESSION [] +POSTHOOK: Lineage: groupby_decimal64_1b.key EXPRESSION [] +PREHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('9075-06-13 16:20:09',32030.01) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: query: insert into groupby_decimal64_1b values ('2018-07-08 10:53:27.252',800) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b +POSTHOOK: Lineage: groupby_decimal64_1b.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_decimal64_1b_nonull(c_timestamp timestamp, key decimal(8,2)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_decimal64_1b_nonull.txt' OVERWRITE INTO TABLE groupby_decimal64_1b_nonull +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-06 00:42:30.91',31713.02) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: query: insert into groupby_decimal64_1b_nonull values ('1970-05-08 45:59:00.0',34) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_decimal64_1b_nonull +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.c_timestamp SCRIPT [] +POSTHOOK: Lineage: groupby_decimal64_1b_nonull.key SCRIPT [] +PREHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 2 +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 1 +0.000 1 +23.220 1 +324.330 2 +33.440 1 +333.330 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +800.000 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,3)/DECIMAL_64) -> 2:decimal(6,3) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,3)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(6,3)] + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-0.342 +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +NULL +PREHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a +#### A masked pattern was here #### +-87.200 +0.000 +23.220 +324.330 +33.440 +333.330 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +800.000 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 2 +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1a_nonull where key != -0.342 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 1 +-87.200 1 +0.000 1 +100.000 1 +23.220 1 +324.330 2 +33.440 1 +435.330 1 +435.331 1 +44.200 2 +55.300 3 +55.330 1 +66.400 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1a_nonull + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:decimal(6,3)/DECIMAL_64, 1:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(6,3)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(6,3)/DECIMAL_64) -> 2:decimal(6,3) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(6,3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(6,3)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:decimal(6,3)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(6,3)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(6,3)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(6,3)] + Reduce Output Operator + key expressions: _col0 (type: decimal(6,3)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(6,3) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(6,3)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-0.342 +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1a_nonull where key != -0.342 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1a_nonull +#### A masked pattern was here #### +-76.200 +-87.200 +0.000 +100.000 +23.220 +324.330 +33.440 +435.330 +435.331 +44.200 +55.300 +55.330 +66.400 +PREHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + aggregators: VectorUDAFCount(col 1:decimal(8,2)/DECIMAL_64) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b where key != 11041.913 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: c_timestamp (type: timestamp), key (type: decimal(8,2)) + outputColumnNames: c_timestamp, key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_timestamp) + Group By Vectorization: + aggregators: VectorUDAFCount(col 0:timestamp) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +NULL 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b where key != 11041.91 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +32030.01 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +800.00 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(8,2)] + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5930 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +NULL +PREHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b where key != 11041.91 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b +#### A masked pattern was here #### +10402.00 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +32030.01 +3566.02 +645.07 +645.93 +7286.29 +800.00 +8925.82 +9559.53 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 1 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.40 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_timestamp) from groupby_decimal64_1b_nonull where key != 2755.40 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 1 +11041.91 1 +13831.90 1 +15464.67 0 +16966.00 1 +16966.99 1 +1735.22 1 +2516.50 1 +2755.90 1 +31713.02 1 +34.00 1 +3566.02 1 +645.07 1 +645.93 1 +7286.29 1 +8925.82 1 +9559.53 1 +PREHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_decimal64_1b_nonull + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:c_timestamp:timestamp, 1:key:decimal(8,2)/DECIMAL_64, 2:ROW__ID:struct] + Select Operator + expressions: key (type: decimal(8,2)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConvertDecimal64ToDecimal(col 1:decimal(8,2)/DECIMAL_64) -> 3:decimal(8,2) + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: decimal(8,2)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(8,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [1] + dataColumns: c_timestamp:timestamp, key:decimal(8,2)/DECIMAL_64 + partitionColumnCount: 0 + scratchColumnTypeNames: [decimal(8,2)] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:decimal(8,2)] + Reduce Output Operator + key expressions: _col0 (type: decimal(8,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:decimal(8,2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(8,2)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.40 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_decimal64_1b_nonull where key != 2755.40 group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_decimal64_1b_nonull +#### A masked pattern was here #### +10402.00 +11041.91 +13831.90 +15464.67 +16966.00 +16966.99 +1735.22 +2516.50 +2755.90 +31713.02 +34.00 +3566.02 +645.07 +645.93 +7286.29 +8925.82 +9559.53 +PREHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_txt +PREHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: CREATE TABLE groupby_string_1a STORED AS ORC AS SELECT * FROM groupby_string_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SIMPLE [(groupby_string_1a_txt)groupby_string_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1a_nonull STORED AS ORC AS SELECT * FROM groupby_string_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SIMPLE [(groupby_string_1a_nonull_txt)groupby_string_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: query: insert into groupby_string_1a_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a_nonull +POSTHOOK: Lineage: groupby_string_1a_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a.txt' OVERWRITE INTO TABLE groupby_string_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_txt +PREHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b +POSTHOOK: query: CREATE TABLE groupby_string_1b STORED AS ORC AS SELECT * FROM groupby_string_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b +POSTHOOK: Lineage: groupby_string_1b.key SIMPLE [(groupby_string_1b_txt)groupby_string_1b_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1a values (NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values (NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key EXPRESSION [] +PREHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('QNCYBDW') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1a values ('NOT') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1a +POSTHOOK: query: insert into groupby_string_1a values ('NOT') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1a +POSTHOOK: Lineage: groupby_string_1a.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull_txt(key char(4)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1a_nonull.txt' OVERWRITE INTO TABLE groupby_string_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1b_nonull STORED AS ORC AS SELECT * FROM groupby_string_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SIMPLE [(groupby_string_1b_nonull_txt)groupby_string_1b_nonull_txt.FieldSchema(name:key, type:char(4), comment:null), ] +PREHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('PXLD') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: insert into groupby_string_1b_nonull values ('AA') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: query: insert into groupby_string_1b_nonull values ('AA') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1b_nonull +POSTHOOK: Lineage: groupby_string_1b_nonull.key SCRIPT [] +PREHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c.txt' OVERWRITE INTO TABLE groupby_string_1c_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_txt +PREHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: CREATE TABLE groupby_string_1c STORED AS ORC AS SELECT * FROM groupby_string_1c_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_date SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SIMPLE [(groupby_string_1c_txt)groupby_string_1c_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2141-02-19', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values (NULL, '2018-04-11', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('ATZJTPECF', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('BB', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('CC', '2018-04-12', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c +POSTHOOK: query: insert into groupby_string_1c values ('DD', '2018-04-14', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c +POSTHOOK: Lineage: groupby_string_1c.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c.s_timestamp EXPRESSION [] +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_string_1c_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_string_1c_nonull.txt' OVERWRITE INTO TABLE groupby_string_1c_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_string_1c_nonull_txt +PREHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_string_1c_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: CREATE TABLE groupby_string_1c_nonull STORED AS ORC AS SELECT * FROM groupby_string_1c_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_string_1c_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SIMPLE [(groupby_string_1c_nonull_txt)groupby_string_1c_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '2144-01-13', '2092-06-07 06:42:30.000538454') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('SDA', '1988-04-23', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('EEE', NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date EXPRESSION [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('FFF', '880-11-01', '22073-03-21 15:32:57.617920888') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp SCRIPT [] +PREHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: query: insert into groupby_string_1c_nonull values ('GGG', '2018-04-15', NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@groupby_string_1c_nonull +POSTHOOK: Lineage: groupby_string_1c_nonull.key SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_date SCRIPT [] +POSTHOOK: Lineage: groupby_string_1c_nonull.s_timestamp EXPRESSION [] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 0 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +NULL 3 +PXLD 3 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a where key != 'PXLD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH 1 +MXGDMBD 1 +NOT 2 +QNCYBDW 3 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 19 Data size: 1580 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 748 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +NULL +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a where key != 'PXLD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a +#### A masked pattern was here #### +FTWURVH +MXGDMBD +NOT +QNCYBDW +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +MXGDMBD 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1a_nonull where key != 'MXGDMBD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA 1 +FTWURVH 1 +PXLD 4 +QNCYBDW 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1a_nonull + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 1230 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 615 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +MXGDMBD +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1a_nonull where key != 'MXGDMBD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1a_nonull +#### A masked pattern was here #### +AA +FTWURVH +PXLD +QNCYBDW +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 0 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +MXGD 1 +NULL 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU 1 +PXLD 3 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 487 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +MXGD +NULL +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b +#### A masked pattern was here #### +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +MXGD 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1b_nonull where key != 'MXGD' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA 1 +FTWU 1 +PXLD 4 +QNCY 1 +UA 1 +WXHJ 5 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1b_nonull + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: char(4)) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: char(4)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Map-reduce partition columns: _col0 (type: char(4)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 14 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: char(4)) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: char(4)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(4)) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 616 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +MXGD +PXLD +QNCY +UA +WXHJ +PREHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1b_nonull where key != 'MXGD' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1b_nonull +#### A masked pattern was here #### +AA +FTWU +PXLD +QNCY +UA +WXHJ +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 0 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 6 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 5 +BB 1 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_date (type: date) + outputColumnNames: key, s_date + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_date) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_date) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 5 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 4 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string), s_timestamp (type: timestamp) + outputColumnNames: key, s_timestamp + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s_timestamp) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +NULL 4 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 3 +BB 0 +BDBMW 1 +BEP 2 +CC 1 +CQMTQLI 2 +DD 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 1 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 7851 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +NULL +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BB +BDBMW +BEP +CC +CQMTQLI +DD +FROPIK +FTWURVH +FYW +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 1 +FFF 1 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 4 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_date) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 1 +GOYJHW 3 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 3 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 0 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +IWEZJHKE 0 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(s_timestamp) from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + 2 +AARNZRVZQ 2 +ATZJTPECF 2 +BDBMW 1 +BEP 2 +CQMTQLI 2 +EEE 0 +FFF 0 +FROPIK 3 +FTWURVH 1 +FYW 1 +GGG 0 +GOYJHW 2 +GSJPSIYOU 1 +IOQIDQBHU 1 +KL 1 +LOTLS 3 +MXGDMBD 1 +NADANUQMW 1 +QTSRKSKB 1 +SDA 2 +VNRXWQ 2 +WNGFTTY 2 +ZNOUDCR 1 +PREHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_string_1c_nonull + Statistics: Num rows: 41 Data size: 7048 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 41 Data size: 7048 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 7048 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 41 Data size: 7048 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3438 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 20 Data size: 3438 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 3438 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 3438 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_string_1c_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +IWEZJHKE +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_string_1c_nonull where key != 'IWEZJHKE' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_string_1c_nonull +#### A masked pattern was here #### + +AARNZRVZQ +ATZJTPECF +BDBMW +BEP +CQMTQLI +EEE +FFF +FROPIK +FTWURVH +FYW +GGG +GOYJHW +GSJPSIYOU +IOQIDQBHU +KL +LOTLS +MXGDMBD +NADANUQMW +QTSRKSKB +SDA +VNRXWQ +WNGFTTY +ZNOUDCR +PREHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a.txt' OVERWRITE INTO TABLE groupby_serialize_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a +POSTHOOK: query: CREATE TABLE groupby_serialize_1a STORED AS ORC AS SELECT * FROM groupby_serialize_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a +POSTHOOK: Lineage: groupby_serialize_1a.key SIMPLE [(groupby_serialize_1a_txt)groupby_serialize_1a_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull_txt(key timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1a_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1a_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1a_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1a_nonull +POSTHOOK: Lineage: groupby_serialize_1a_nonull.key SIMPLE [(groupby_serialize_1a_nonull_txt)groupby_serialize_1a_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b.txt' OVERWRITE INTO TABLE groupby_serialize_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b +POSTHOOK: query: CREATE TABLE groupby_serialize_1b STORED AS ORC AS SELECT * FROM groupby_serialize_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b +POSTHOOK: Lineage: groupby_serialize_1b.c_double SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_smallint SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.c_string SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b.key SIMPLE [(groupby_serialize_1b_txt)groupby_serialize_1b_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull_txt(key timestamp, c_smallint smallint, c_string string, c_double double) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@groupby_serialize_1b_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_serialize_1b_nonull.txt' OVERWRITE INTO TABLE groupby_serialize_1b_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@groupby_serialize_1b_nonull_txt +PREHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@groupby_serialize_1b_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: query: CREATE TABLE groupby_serialize_1b_nonull STORED AS ORC AS SELECT * FROM groupby_serialize_1b_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@groupby_serialize_1b_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@groupby_serialize_1b_nonull +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_double SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_double, type:double, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_smallint SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_smallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.c_string SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:c_string, type:string, comment:null), ] +POSTHOOK: Lineage: groupby_serialize_1b_nonull.key SIMPLE [(groupby_serialize_1b_nonull_txt)groupby_serialize_1b_nonull_txt.FieldSchema(name:key, type:timestamp, comment:null), ] +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1a group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +NULL 4 +PREHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 17 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 244 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +NULL +PREHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2082-07-14 04:00:40.695380469 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 1 +2093-04-10 23:36:54.846 3 +2188-06-04 15:03:14.963259704 1 +2299-11-15 16:41:30.401 1 +2306-06-21 11:02:00.143124239 2 +2608-02-23 23:44:02.546440891 1 +2686-05-23 07:46:46.565832918 2 +2898-10-01 22:27:02.000871113 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1a_nonull + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 13 Data size: 520 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2082-07-14 04:00:40.695380469 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1a_nonull where key != '2082-07-14 04:00:40.695380469' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1a_nonull +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 +2093-04-10 23:36:54.846 +2188-06-04 15:03:14.963259704 +2299-11-15 16:41:30.401 +2306-06-21 11:02:00.143124239 +2608-02-23 23:44:02.546440891 +2686-05-23 07:46:46.565832918 +2898-10-01 22:27:02.000871113 +PREHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(key) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(*) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 2 +PREHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_smallint (type: smallint) + outputColumnNames: key, c_smallint + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_smallint) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 1 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp), c_string (type: string) + outputColumnNames: key, c_string + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(c_string) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +NULL 0 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2145-10-15 06:58:42.831 0 +2242-08-04 07:51:46.905 1 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 4 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2391-01-17 15:28:37.00045143 1 +2409-09-23 10:33:27 1 +2461-03-09 09:54:45.000982385 2 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 2 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 1 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 4 +2971-02-14 09:13:19 1 +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 47 Data size: 6175 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 23 Data size: 3021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +NULL +PREHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b where key != '2083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b +#### A masked pattern was here #### +1941-10-16 02:19:35.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2145-10-15 06:58:42.831 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2391-01-17 15:28:37.00045143 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(key) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_smallint) from groupby_serialize_1b_nonull where key != '2083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 0 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 2 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 1 +1941-10-16 02:19:35.000423663 1 +1957-03-06 09:57:31 1 +1980-09-13 19:57:15 1 +2018-11-25 22:27:55.84 1 +2044-05-02 07:00:03.35 1 +2073-03-21 15:32:57.617920888 1 +2075-10-25 20:32:40.000792874 0 +2083-06-07 09:35:19.383 1 +2105-01-04 16:27:45 1 +2145-10-15 06:58:42.831 1 +2188-06-04 15:03:14.963259704 1 +2242-08-04 07:51:46.905 2 +2266-09-26 06:27:29.000284762 1 +2301-06-03 17:16:19 1 +2304-12-15 15:31:16 7 +2309-01-15 12:43:49 1 +2332-06-14 07:02:42.32 1 +2333-07-28 09:59:26 1 +2338-02-12 09:30:07 1 +2340-12-15 05:15:17.133588982 1 +2357-05-08 07:09:09.000482799 1 +2391-01-17 15:28:37.00045143 1 +2396-04-06 15:39:02.404013577 2 +2409-09-23 10:33:27 3 +2461-03-09 09:54:45.000982385 2 +2462-12-16 23:11:32.633305644 1 +2467-05-11 06:04:13.426693647 1 +2512-10-06 03:03:03 4 +2535-03-01 05:04:49.000525883 1 +2629-04-07 01:54:11 2 +2637-03-12 22:25:46.385 2 +2686-05-23 07:46:46.565832918 1 +2688-02-06 20:58:42.000947837 1 +2808-07-09 02:10:11.928498854 1 +2829-06-04 08:01:47.836 1 +2861-05-27 07:13:01.000848622 1 +2888-05-08 08:36:55.182302102 2 +2897-08-10 15:21:47.09 1 +2898-12-18 03:37:17 1 +2938-12-21 23:35:59.498 1 +2960-04-12 07:03:42.000366651 1 +2969-01-23 14:08:04.000667259 5 +2971-02-14 09:13:19 1 +PREHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key, count(c_string) from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PREHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: groupby_serialize_1b_nonull + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: key (type: timestamp) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: key (type: timestamp) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 66 Data size: 9056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 4528 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +1931-12-04 11:13:47.269597392 +1941-10-16 02:19:35.000423663 +1957-03-06 09:57:31 +1980-09-13 19:57:15 +2018-11-25 22:27:55.84 +2044-05-02 07:00:03.35 +2073-03-21 15:32:57.617920888 +2075-10-25 20:32:40.000792874 +2083-06-07 09:35:19.383 +2105-01-04 16:27:45 +2145-10-15 06:58:42.831 +2188-06-04 15:03:14.963259704 +2242-08-04 07:51:46.905 +2266-09-26 06:27:29.000284762 +2301-06-03 17:16:19 +2304-12-15 15:31:16 +2309-01-15 12:43:49 +2332-06-14 07:02:42.32 +2333-07-28 09:59:26 +2338-02-12 09:30:07 +2340-12-15 05:15:17.133588982 +2357-05-08 07:09:09.000482799 +2391-01-17 15:28:37.00045143 +2396-04-06 15:39:02.404013577 +2409-09-23 10:33:27 +2461-03-09 09:54:45.000982385 +2462-12-16 23:11:32.633305644 +2467-05-11 06:04:13.426693647 +2512-10-06 03:03:03 +2535-03-01 05:04:49.000525883 +2629-04-07 01:54:11 +2637-03-12 22:25:46.385 +2686-05-23 07:46:46.565832918 +2688-02-06 20:58:42.000947837 +2808-07-09 02:10:11.928498854 +2829-06-04 08:01:47.836 +2861-05-27 07:13:01.000848622 +2888-05-08 08:36:55.182302102 +2897-08-10 15:21:47.09 +2898-12-18 03:37:17 +2938-12-21 23:35:59.498 +2960-04-12 07:03:42.000366651 +2969-01-23 14:08:04.000667259 +2971-02-14 09:13:19 +PREHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +POSTHOOK: query: select key from groupby_serialize_1b_nonull where key != '22083-06-07 09:35:19.383' group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@groupby_serialize_1b_nonull +#### A masked pattern was here #### +PREHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@over10k +POSTHOOK: query: CREATE TABLE over10k(t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@over10k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@over10k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/over10k' OVERWRITE INTO TABLE over10k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@over10k +PREHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(s) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(s) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string), ts (type: timestamp) + outputColumnNames: s, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(ts) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: s (type: string) + outputColumnNames: s + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: s (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select s, count(*) from over10k group by s order by s limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +alice allen 8 +alice brown 14 +alice carson 10 +alice davidson 18 +alice ellison 15 +alice falkner 17 +alice garcia 13 +alice hernandez 18 +alice ichabod 22 +alice johnson 12 +PREHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(ts) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(ts) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: d (type: double), ts (type: timestamp) + outputColumnNames: d, ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(d) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(d) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: ts (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select ts, count(*) from over10k group by ts order by ts limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +2013-03-01 09:11:58.70307 26 +2013-03-01 09:11:58.703071 50 +2013-03-01 09:11:58.703072 32 +2013-03-01 09:11:58.703073 42 +2013-03-01 09:11:58.703074 45 +2013-03-01 09:11:58.703075 38 +2013-03-01 09:11:58.703076 45 +2013-03-01 09:11:58.703077 50 +2013-03-01 09:11:58.703078 24 +2013-03-01 09:11:58.703079 43 +PREHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(dec) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(`dec`) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)), bin (type: binary) + outputColumnNames: dec, bin + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(bin) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(bin) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: dec (type: decimal(4,2)) + outputColumnNames: dec + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: dec (type: decimal(4,2)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: _col0 (type: decimal(4,2)) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: decimal(4,2)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: decimal(4,2)) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select `dec`, count(*) from over10k group by `dec` order by `dec` limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +0.01 2 +0.02 1 +0.03 2 +0.04 1 +0.05 1 +0.06 3 +0.07 1 +0.08 3 +0.10 1 +0.11 1 +PREHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(i) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(i) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int), b (type: bigint) + outputColumnNames: i, b + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(b) + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(b) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i, count(*) from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 45 +65537 35 +65538 29 +65539 24 +65540 29 +65541 43 +65542 37 +65543 40 +65544 42 +65545 39 +PREHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization operator +select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: over10k + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: i (type: int) + outputColumnNames: i + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + vectorProcessingMode: HASH + keys: i (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10175440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select i from over10k group by i order by i limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@over10k +#### A masked pattern was here #### +POSTHOOK: query: select i from over10k group by i order by i limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@over10k +#### A masked pattern was here #### +65536 +65537 +65538 +65539 +65540 +65541 +65542 +65543 +65544 +65545 diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index e89b6bc..cdeb456 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -164,6 +164,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: s_store_id (type: string), 0L (type: bigint) @@ -275,6 +277,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:string, ConstantVectorExpression(val 0) -> 30:bigint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No Grouping Sets IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string), 0L (type: bigint) diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 848823f..0e9cf70 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -241,6 +241,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index 793d99e..8cbc52c 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -141,6 +141,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 7:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: bo (type: boolean) diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index a6d87c2..fecf230 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -703,6 +703,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 77a5bc7..25b5933 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -343,6 +343,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index 20f8f4b..4fb9a0c 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -244,7 +244,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -284,7 +284,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -324,7 +324,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_a","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index 51ed3a2..b5a4570 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -782,7 +782,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.groupby.native.enabled IS true","Single COUNT aggregation or Duplicate Reduction IS true","Group By Mode HASH IS true","No Grouping Sets IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"],"vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[]","featureSupportInUse:":"[]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out index 7454c4b..debd4ba 100644 --- ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out +++ ql/src/test/results/clientpositive/vector_outer_join_no_keys.q.out @@ -98,6 +98,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -243,6 +245,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index b46501e..2bff1fd 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -61,6 +61,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out index 8784836..05a7792 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out @@ -98,6 +98,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:int, col 1:int native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index bede8a1..6472b22 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -348,6 +348,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 20:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 13fb6d1..04260fd 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -56,6 +56,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_1.q.out ql/src/test/results/clientpositive/vectorization_1.q.out index bb8e483..7a58051 100644 --- ql/src/test/results/clientpositive/vectorization_1.q.out +++ ql/src/test/results/clientpositive/vectorization_1.q.out @@ -81,6 +81,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_12.q.out ql/src/test/results/clientpositive/vectorization_12.q.out index e129730..929983f 100644 --- ql/src/test/results/clientpositive/vectorization_12.q.out +++ ql/src/test/results/clientpositive/vectorization_12.q.out @@ -105,6 +105,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index 96eda74..277e22a 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -437,6 +439,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 7a7a817..7fb0743 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -107,6 +107,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index dbef3e7..3196618 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -103,6 +103,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index 571eae0..bee258a 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_2.q.out ql/src/test/results/clientpositive/vectorization_2.q.out index e3d6ad0..5833f66 100644 --- ql/src/test/results/clientpositive/vectorization_2.q.out +++ ql/src/test/results/clientpositive/vectorization_2.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_3.q.out ql/src/test/results/clientpositive/vectorization_3.q.out index bb6c014..4a346f4 100644 --- ql/src/test/results/clientpositive/vectorization_3.q.out +++ ql/src/test/results/clientpositive/vectorization_3.q.out @@ -90,6 +90,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_4.q.out ql/src/test/results/clientpositive/vectorization_4.q.out index 395431c..6d4524f 100644 --- ql/src/test/results/clientpositive/vectorization_4.q.out +++ ql/src/test/results/clientpositive/vectorization_4.q.out @@ -85,6 +85,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_5.q.out ql/src/test/results/clientpositive/vectorization_5.q.out index dfe9715..0419d5d 100644 --- ql/src/test/results/clientpositive/vectorization_5.q.out +++ ql/src/test/results/clientpositive/vectorization_5.q.out @@ -78,6 +78,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index 571eae0..bee258a 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -80,6 +80,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 7474547..96023b1 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -245,6 +245,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) @@ -425,6 +427,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 0:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) @@ -729,6 +733,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 5:double native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: cdouble (type: double) diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index 2c4fa69..c2c40e5 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -38,6 +38,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 31dcd37..3a1a870 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -292,6 +292,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -407,6 +409,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 50c3448..f0b5be0 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1240,6 +1240,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index d9c781c..68233de 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -93,6 +93,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index e9a0e45..66ef9a3 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -114,6 +114,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Single COUNT aggregation or Duplicate Reduction IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index fb7198d..014a9cd 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -133,6 +133,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -307,6 +309,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -481,6 +485,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 3b7de64..ff32512 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -360,6 +360,8 @@ STAGE PLANS: groupByMode: HASH keyExpressions: col 1:tinyint native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: _col0 (type: tinyint) diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index b0bfc8b..f337d65 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -135,6 +135,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -322,6 +324,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] mode: hash @@ -429,6 +433,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2] mode: hash diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 244aca6..45f00fc 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -732,6 +732,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash @@ -839,6 +841,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0] mode: hash @@ -959,6 +963,8 @@ STAGE PLANS: className: VectorGroupByOperator groupByMode: HASH native: false + nativeConditionsMet: hive.vectorized.execution.groupby.native.enabled IS true, Group By Mode HASH IS true, No Grouping Sets IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, Single COUNT aggregation or Duplicate Reduction IS false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] mode: hash diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index fbb89a9..56b2cd4 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -26,10 +26,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Task; @@ -1162,6 +1165,36 @@ //template, , , {"VectorUDAFVarMerge", "VectorUDAFVarPartial2", "PARTIAL2"}, {"VectorUDAFVarMerge", "VectorUDAFVarFinal", "FINAL"}, + + {"GroupByHashSingleKeySingleCountColumnOperator", "VectorGroupByHash", "Long", "KeySingleCountColumnOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountColumnOperator", "VectorGroupByHash", "String", "KeySingleCountColumnOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountColumnOperator", "VectorGroupByHash", "Serialize", "KeySingleCountColumnOperator", "SingleCount"}, + + {"GroupByHashSingleKeySingleCountKeyOperator", "VectorGroupByHash", "Long", "KeySingleCountKeyOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountKeyOperator", "VectorGroupByHash", "String", "KeySingleCountKeyOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountKeyOperator", "VectorGroupByHash", "Serialize", "KeySingleCountKeyOperator", "SingleCount"}, + + {"GroupByHashSingleKeySingleCountStarOperator", "VectorGroupByHash", "Long", "KeySingleCountStarOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountStarOperator", "VectorGroupByHash", "String", "KeySingleCountStarOperator", "SingleCount"}, + {"GroupByHashSingleKeySingleCountStarOperator", "VectorGroupByHash", "Serialize", "KeySingleCountStarOperator", "SingleCount"}, + + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Long", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "String", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + {"GroupByHashSingleKeyDuplicateReductionOperator", "VectorGroupByHash", "Serialize", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + + + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeySingleCountColumnOperator", "SingleCount"}, + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeySingleCountKeyOperator", "SingleCount"}, + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeySingleCountStarOperator", "SingleCount"}, + + {"GroupByHashSingleKeyDecimal64Operator", "VectorGroupByHash", "Decimal64", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + + {"GroupByHashMultiKeySingleCountColumnOperator", "VectorGroupByHash", "Multi", "KeySingleCountColumnOperator", "SingleCount"}, + {"GroupByHashMultiKeySingleCountKeyOperator", "VectorGroupByHash", "Multi", "KeySingleCountKeyOperator", "SingleCount"}, + {"GroupByHashMultiKeySingleCountStarOperator", "VectorGroupByHash", "Multi", "KeySingleCountStarOperator", "SingleCount"}, + + {"GroupByHashMultiKeyDuplicateReductionOperator", "VectorGroupByHash", "Multi", "KeyDuplicateReductionOperator", "DuplicateReduction"}, + }; @@ -1174,6 +1207,11 @@ private String udafOutputDirectory; private String udafClassesDirectory; private String udafTemplateDirectory; + + private String groupByOperatorOutputDirectory; + private String groupByOperatorClassesDirectory; + private String groupByOperatorTemplateDirectory; + private GenVectorTestCode testCodeGen; static String joinPath(String...parts) { @@ -1210,6 +1248,16 @@ public void init(String templateBaseDir, String buildDir) { udafTemplateDirectory = joinPath(generationDirectory.getAbsolutePath(), "UDAFTemplates"); + String groupByOperator = joinPath("org", "apache", "hadoop", + "hive", "ql", "exec", "vector", "groupby", "operator", "gen"); + File groupByOperatorOutput = new File(joinPath(buildPath, groupByOperator)); + File groupByOperatorClasses = new File(joinPath(compiledPath, groupByOperator)); + groupByOperatorOutputDirectory = groupByOperatorOutput.getAbsolutePath(); + groupByOperatorClassesDirectory = groupByOperatorClasses.getAbsolutePath(); + + groupByOperatorTemplateDirectory = + joinPath(generationDirectory.getAbsolutePath(), "GroupByOperatorTemplates"); + File testCodeOutput = new File( joinPath(buildDir, "generated-test-sources", "java", "org", @@ -1433,6 +1481,18 @@ private void generate() throws Exception { } else if (tdesc[0].equals("TimestampArithmeticDate")) { generateTimestampArithmeticDate(tdesc); + } else if ( + tdesc[0].equals("GroupByHashSingleKeyOperatorBase") || + tdesc[0].equals("GroupByHashSingleKeyDecimal64Operator") || + tdesc[0].equals("GroupByHashSingleKeySingleCountColumnOperator") || + tdesc[0].equals("GroupByHashSingleKeySingleCountKeyOperator") || + tdesc[0].equals("GroupByHashSingleKeySingleCountStarOperator") || + tdesc[0].equals("GroupByHashSingleKeyDuplicateReductionOperator") || + tdesc[0].equals("GroupByHashMultiKeySingleCountColumnOperator") || + tdesc[0].equals("GroupByHashMultiKeySingleCountKeyOperator") || + tdesc[0].equals("GroupByHashMultiKeySingleCountStarOperator") || + tdesc[0].equals("GroupByHashMultiKeyDuplicateReductionOperator")) { + generateGroupByOperator(tdesc); } else { continue; } @@ -3492,35 +3552,133 @@ private static boolean isTimestampIntervalType(String type) { || type.equals("interval_day_time")); } - private boolean containsDefinedStrings(Set defineSet, String commaDefinedString) { - String[] definedStrings = commaDefinedString.split(","); - boolean result = false; - for (String definedString : definedStrings) { - if (defineSet.contains(definedString)) { - result = true; - break; + private void generateGroupByOperator(String[] tdesc) throws Exception { + String templateName = tdesc[0]; + String classNamePrefix = tdesc[1]; + String singleKeyVariation = tdesc[2]; + String classNameSuffix = tdesc[3]; + String aggregationVariation = tdesc[4]; + + //Read the template into a string; + String className = classNamePrefix + singleKeyVariation + classNameSuffix; + File templateFile = + new File(joinPath(this.groupByOperatorTemplateDirectory, templateName + ".txt")); + String templateString = readFile(templateFile); + + final String defineName = singleKeyVariation.toUpperCase() + "_KEY"; + templateString = evaluateIfDefined(templateString, defineName, + this.groupByOperatorTemplateDirectory); + + templateString = templateString.replaceAll("", className); + final String keyColumnVectorType; + if (singleKeyVariation.equals("Long") || singleKeyVariation.equals("Decimal64")) { + keyColumnVectorType = "LongColumnVector"; + } else if (singleKeyVariation.equals("String")) { + keyColumnVectorType = "BytesColumnVector"; + } else { + keyColumnVectorType = "ColumnVector"; + } + templateString = templateString.replaceAll("", singleKeyVariation); + templateString = templateString.replaceAll("", singleKeyVariation.toLowerCase()); + templateString = templateString.replaceAll("", aggregationVariation); + templateString = templateString.replaceAll("", aggregationVariation.toLowerCase()); + templateString = templateString.replaceAll("", keyColumnVectorType); + templateString = templateString.replaceAll("", classNameSuffix); + + writeFile(templateFile.lastModified(), groupByOperatorOutputDirectory, groupByOperatorClassesDirectory, + className, templateString); + } + + private boolean matchesDefinedStrings(Set defineSet, Set newIfDefinedSet, + IfDefinedMode ifDefinedMode) { + switch (ifDefinedMode) { + case SINGLE: + case AND_ALL: + for (String candidateString : newIfDefinedSet) { + if (!defineSet.contains(candidateString)) { + return false; + } + } + return true; + case OR_ANY: + for (String candidateString : newIfDefinedSet) { + if (defineSet.contains(candidateString)) { + return true; + } } + return false; + default: + throw new RuntimeException("Unexpected if defined mode " + ifDefinedMode); } - return result; } - private int doIfDefinedStatement(String[] lines, int index, Set definedSet, - boolean outerInclude, StringBuilder sb) { - String ifLine = lines[index]; + public enum IfDefinedMode { + SINGLE, + AND_ALL, + OR_ANY; + } + + private IfDefinedMode parseIfDefinedMode(String newIfDefinedString, Set newIfDefinedSet) { + final String[] newIfDefinedStrings; + final IfDefinedMode ifDefinedMode; + int index = newIfDefinedString.indexOf("&&"); + if (index != -1) { + newIfDefinedStrings = newIfDefinedString.split("&&"); + ifDefinedMode = IfDefinedMode.AND_ALL; + } else { + index = newIfDefinedString.indexOf("||"); + if (index == -1) { + + // One element. + newIfDefinedSet.add(newIfDefinedString); + return IfDefinedMode.SINGLE; + } else { + newIfDefinedStrings = newIfDefinedString.split("\\|\\|"); + ifDefinedMode = IfDefinedMode.OR_ANY; + } + } + for (String newDefinedString : newIfDefinedStrings) { + newIfDefinedSet.add(newDefinedString); + } + return ifDefinedMode; + } + + private int doIfDefinedStatement(List linesList, int index, + Set desiredIfDefinedSet, boolean outerInclude, + List ifDefinedEvaluatedLinesList, boolean isExactFilter, boolean filterPredicate) { + String ifLine = linesList.get(index); final int ifLineNumber = index + 1; - String commaDefinedString = ifLine.substring("#IF ".length()); - boolean includeBody = containsDefinedStrings(definedSet, commaDefinedString); + String ifDefinedString = ifLine.substring("#IF ".length()); + Set ifDefinedSet = new HashSet(); + IfDefinedMode ifDefinedMode = parseIfDefinedMode(ifDefinedString, ifDefinedSet); + + boolean includeBody; + final boolean isExactMatch; + if (isExactFilter) { + + // Normally, we throw away any #IF statements that don't match the desired set. + // But optionally, we filter on exact #IF/@ELSE/#ENDIF statements and let all others through. + isExactMatch = desiredIfDefinedSet.equals(ifDefinedSet); + if (isExactMatch) { + includeBody = filterPredicate; + } else { + includeBody = true; + } + } else { + includeBody = matchesDefinedStrings(desiredIfDefinedSet, ifDefinedSet, ifDefinedMode); + isExactMatch = false; + } + index++; - final int end = lines.length; + final int end = linesList.size(); while (true) { if (index >= end) { - throw new RuntimeException("Unmatched #IF at line " + index + " for " + commaDefinedString); + throw new RuntimeException("Unmatched #IF at line " + index + " for " + ifDefinedString); } - String line = lines[index]; + String line = linesList.get(index); if (line.length() == 0 || line.charAt(0) != '#') { if (outerInclude && includeBody) { - sb.append(line); - sb.append("\n"); + ifDefinedEvaluatedLinesList.add(line); } index++; continue; @@ -3529,63 +3687,375 @@ private int doIfDefinedStatement(String[] lines, int index, Set definedS // A pound # statement (IF/ELSE/ENDIF). if (line.startsWith("#IF ")) { // Recurse. - index = doIfDefinedStatement(lines, index, definedSet, outerInclude && includeBody, sb); + index = + doIfDefinedStatement( + linesList, index, desiredIfDefinedSet, outerInclude && includeBody, + ifDefinedEvaluatedLinesList, isExactFilter, filterPredicate); } else if (line.equals("#ELSE")) { + // Flip inclusion. - includeBody = !includeBody; + if (isExactFilter) { + if (isExactMatch) { + includeBody = !includeBody; + } + } else { + includeBody = !includeBody; + } index++; } else if (line.equals("#ENDIF")) { throw new RuntimeException("Missing defined strings with #ENDIF on line " + (index + 1)); } else if (line.startsWith("#ENDIF ")) { String endCommaDefinedString = line.substring("#ENDIF ".length()); - if (!commaDefinedString.equals(endCommaDefinedString)) { + if (!ifDefinedString.equals(endCommaDefinedString)) { throw new RuntimeException( "#ENDIF defined names \"" + endCommaDefinedString + "\" (line " + ifLineNumber + - " do not match \"" + commaDefinedString + "\" (line " + (index + 1) + ")"); + " do not match \"" + ifDefinedString + "\" (line " + (index + 1) + ")"); } return ++index; + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + if (outerInclude && includeBody) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; + continue; } } } - private void doEvaluateIfDefined(String[] lines, int index, Set definedSet, - boolean outerInclude, StringBuilder sb) { - final int end = lines.length; + private void doProcessIfDefined(List linesList, int index, Set definedSet, + boolean outerInclude, List ifDefinedEvaluatedLinesList, + boolean isExactFilter, boolean predicate) { + final int end = linesList.size(); while (true) { if (index >= end) { break; } - String line = lines[index]; + String line = linesList.get(index); if (line.length() == 0 || line.charAt(0) != '#') { if (outerInclude) { - sb.append(line); - sb.append("\n"); + ifDefinedEvaluatedLinesList.add(line); } index++; continue; } - // A pound # statement (IF/ELSE/ENDIF). if (line.startsWith("#IF ")) { - index = doIfDefinedStatement(lines, index, definedSet, outerInclude, sb); + + // A pound # statement (#IF #ELSE #ENDIF). + index = + doIfDefinedStatement( + linesList, index, definedSet, outerInclude, + ifDefinedEvaluatedLinesList, isExactFilter, predicate); + } else if ( + !line.startsWith("#BEGIN_LINES ") && + !line.startsWith("#END_LINES") && + line.startsWith("#USE_LINES ") && + line.startsWith("#COMMENT")) { + throw new RuntimeException( + "Problem with #IF #ELSE #ENDIF on line " + (index + 1) + ": " + line); + } else { + if (outerInclude) { + ifDefinedEvaluatedLinesList.add(line); + } + index++; + } + } + } + + private void doUseLinesCollectAndFilter(List linesList, + Map> useLinesMap, List filteredLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#BEGIN_LINES ")) { + + final int beginLineIndex = index; + String linesTitle = line.substring("#BEGIN_LINES ".length()); + if (useLinesMap.containsKey(linesTitle)) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- duplicate name " + linesTitle); + } + while (true) { + if (index >= size) { + throw new RuntimeException( + "Problem #BEGIN_LINES that started at " + beginLineIndex + + " -- no matching #END_LINES found"); + } + line = linesList.get(index); + if (line.startsWith("#END_LINES")) { + useLinesMap.put(linesTitle, linesList.subList(beginLineIndex + 1, index)); + break; + } + index++; + } + } else if (line.startsWith("#COMMENT")) { + // Filter out comment lines. + } else { + filteredLinesList.add(line); + } + index++; + } + } + + private void doUseLinesApply(List linesList, Map> useLinesMap, + List resultLinesList) { + + int index = 0; + final int size = linesList.size(); + while (true) { + + if (index >= size) { + return; + } + String line = linesList.get(index); + if (line.startsWith("#USE_LINES ")) { + + String linesTitle = line.substring("#USE_LINES ".length()); + final int blankCharIndex = linesTitle.indexOf(" "); + int pad = 0; + if (blankCharIndex != -1) { + String remainder = linesTitle.substring(blankCharIndex + 1); + linesTitle = linesTitle.substring(0, blankCharIndex); + if (!remainder.startsWith("+")) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- expecting + sign for indent"); + } + String padString = remainder.substring(1); + pad = Integer.valueOf(padString); + } + List useLines = useLinesMap.get(linesTitle); + if (useLines == null) { + throw new RuntimeException( + "Problem #USE_LINES that started at " + index + + " -- name " + linesTitle + " not found"); + } + if (pad == 0) { + resultLinesList.addAll(useLines); } else { - throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + String padoutString = StringUtils.leftPad("", pad); + for (String useLine : useLines) { + resultLinesList.add(padoutString + useLine); + } } + } else { + resultLinesList.add(line); } + index++; + } } - private String evaluateIfDefined(String linesString, List definedList) { + private void doIncludeProcessing(String[] lines, String templateDirectory, + List resultList) throws IOException { + + // Just one level. + int index = 0; + final int size = lines.length; + while (true) { + + if (index >= size) { + return; + } + String line = lines[index]; + if (line.startsWith("#INCLUDE ")) { + String includeFileName = line.substring("#INCLUDE ".length()); + File includeFile; + String includeString; + final int blankCharIndex = includeFileName.indexOf(" "); + if (blankCharIndex != -1) { + String remainder = includeFileName.substring(blankCharIndex + 1); + includeFileName = includeFileName.substring(0, blankCharIndex); + + includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + includeString = readFile(includeFile); + + // Process optional comma separated parameters. + String[] parameters = remainder.split(","); + List filterIfDefinedList = new ArrayList(); + List filterIfPredicateList = new ArrayList(); + List substitutionNames = new ArrayList(); + List substitutions = new ArrayList(); + for (String parameter : parameters) { + Character firstChar = parameter.charAt(0); + if (Character.isUpperCase(firstChar)) { + + // #IF filter. + final int equalsCharIndex = parameter.indexOf("="); + if (equalsCharIndex == -1) { + throw new RuntimeException( + "Problem #INCLUDE #IF filter " + index + + " -- no '='"); + } + String filterIfDefinedName = parameter.substring(0, equalsCharIndex); + String predicateString = parameter.substring(equalsCharIndex + 1); + final boolean predicate; + if (predicateString.equalsIgnoreCase("true")) { + predicate = true; + } else if (predicateString.equalsIgnoreCase("false")) { + predicate = false; + } else { + throw new RuntimeException( + "Problem #INCLUDE #IF filter " + index + + " -- expecting 'true' or 'false'"); + } + + filterIfDefinedList.add(filterIfDefinedName); + filterIfPredicateList.add(predicate); + } else if (firstChar == '<') { + + // Substitution. + final int closeCharIndex = parameter.indexOf(">"); + if (closeCharIndex == -1) { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- no '>'"); + } + // Keep <>. + String substitutionName = parameter.substring(0, closeCharIndex + 1); + + Character equalsChar = parameter.charAt(closeCharIndex + 1); + if (equalsChar != '=') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- not '='"); + } + final int substitutionIndex = closeCharIndex + 2; + Character startQuote = parameter.charAt(substitutionIndex); + if (startQuote != '"') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- missing start euote '\"'"); + } + final int parameterSize = parameter.length(); + Character endQuote = parameter.charAt(parameterSize - 1); + if (endQuote != '"') { + throw new RuntimeException( + "Problem #INCLUDE substitution specification " + index + + " -- missing end euote '\"'"); + } + String substitution = parameter.substring(substitutionIndex + 1, parameterSize - 1); + + substitutionNames.add(substitutionName); + substitutions.add(substitution); + } + } + + // Example: + // + // #INCLUDE file LOGICAL_BATCH_PROCESSING=true,="Logical",="logical" + // + final int filterCount = filterIfDefinedList.size(); + for (int f = 0; f < filterCount; f++) { + + // Only process any #IF/#ELSE/#ENDIF that are exact matches. + includeString = + exactFilterIfDefined( + includeString, filterIfDefinedList.get(f), filterIfPredicateList.get(f)); + } + final int substitutionCount = substitutionNames.size(); + for (int s = 0; s < substitutionCount; s++) { + includeString = + includeString.replaceAll( + substitutionNames.get(s), substitutions.get(s)); + } + } else { + includeFile = + new File(joinPath(templateDirectory, includeFileName + ".txt")); + includeString = readFile(includeFile); + } + String[] includeLines = includeString.split("\n"); + List includeLinesList = Arrays.asList(includeLines); + resultList.addAll(includeLinesList); + } else { + resultList.add(line); + } + index++; + } + } + + private String processIfDefined(String linesString, List definedList, + String templateDirectory) throws IOException { + return processIfDefined( + linesString, definedList, templateDirectory, + /* isExactFilter */ false, /* filterPredicate */ false); + } + + private String processIfDefined(String linesString, List definedList, + String templateDirectory, boolean isExactFilter, boolean filterPredicate) throws IOException { + String[] lines = linesString.split("\n"); Set definedSet = new HashSet(definedList); + + List includedLinesList; + if (templateDirectory == null) { + includedLinesList = Arrays.asList(lines); + } else { + includedLinesList = new ArrayList(); + doIncludeProcessing(lines, templateDirectory, includedLinesList); + } + + List ifDefinedEvaluatedLinesList = new ArrayList(); + doProcessIfDefined( + includedLinesList, 0, definedSet, true, ifDefinedEvaluatedLinesList, + isExactFilter, filterPredicate); + + Map> useLinesMap = new HashMap>(); + List filteredLinesList = new ArrayList(); + doUseLinesCollectAndFilter(ifDefinedEvaluatedLinesList, useLinesMap, filteredLinesList); + + List resultLinesList; + if (useLinesMap.isEmpty()) { + resultLinesList = filteredLinesList; + } else { + resultLinesList = new ArrayList(); + doUseLinesApply(filteredLinesList, useLinesMap, resultLinesList); + } + StringBuilder sb = new StringBuilder(); - doEvaluateIfDefined(lines, 0, definedSet, true, sb); + for (String line : resultLinesList) { + sb.append(line); + sb.append("\n"); + } return sb.toString(); } - private String evaluateIfDefined(String linesString, String definedString) { - return evaluateIfDefined(linesString, Arrays.asList(definedString.split(","))); + private String evaluateIfDefined(String linesString, List definedList) + throws IOException { + return processIfDefined(linesString, definedList, null); + } + + private String evaluateIfDefined(String linesString, String definedString) + throws IOException{ + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), null); + } + + private String exactFilterIfDefined(String linesString, String definedString, + boolean filterPredicate) + throws IOException{ + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), null, true, filterPredicate); + } + + private String evaluateIfDefined(String linesString, String definedString, + String templateDirectory) throws IOException { + return processIfDefined( + linesString, Arrays.asList(definedString.split(",")), templateDirectory); } static void writeFile(long templateTime, String outputDir, String classesDir,